Projects STRLCPY OnionSearch Commits e0134368
🤬
  • Scraping mechanism is improved. 3 search engines are added. (2)

  • Loading...
  • Gobarigo committed 4 years ago
    e0134368
    1 parent 1b29436b
  • ■ ■ ■ ■ ■ ■
    README.md
    skipped 18 lines
    19 19  - Dark Search Enginer
    20 20  - Phobos
    21 21  - Onion Search Server
    22  -- Grams
     22 +- Grams (x2)
    23 23  - Candle
    24  -- Tor Search Engine
    25  -- Torgle
     24 +- Tor Search Engine (x2)
     25 +- Torgle (x2)
    26 26  - Onion Search Engine
    27 27  - Tordex
    28 28  - Tor66
    skipped 137 lines
  • ■ ■ ■ ■ ■
    requirements.txt
     1 +requests
    1 2  beautifulsoup4
    2 3  tqdm
    3 4  argparse
    4  - 
     5 +html5lib
  • ■ ■ ■ ■ ■ ■
    search.py
    skipped 4 lines
    5 5  import time
    6 6  from datetime import datetime
    7 7  from random import choice
     8 +from tqdm import tqdm
    8 9   
    9 10  import requests
     11 +import urllib.parse as urlparse
     12 +from urllib.parse import parse_qs
     13 +from urllib.parse import quote
     14 +from urllib.parse import unquote
    10 15  from bs4 import BeautifulSoup
    11  -from tqdm import tqdm
     16 + 
     17 +import engines
    12 18   
    13 19  desktop_agents = [
    14 20   'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
    skipped 12 lines
    27 33   'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0'
    28 34  ]
    29 35   
    30  -supported_engines = [
    31  - "ahmia",
    32  - "torch",
    33  - "darksearchio",
    34  - "onionland",
    35  - "notevil",
    36  - "visitor",
    37  - "darksearchenginer",
    38  - "phobos",
    39  - "onionsearchserver",
    40  - "grams",
    41  - "candle",
    42  - "torsearchengine",
    43  - "torgle",
    44  - "onionsearchengine",
    45  - "tordex",
    46  - "tor66",
    47  - "tormax",
    48  - "haystack",
    49  - "multivac",
    50  - "evosearch",
    51  - "oneirun",
    52  - "deeplink",
    53  -]
     36 +supported_engines = engines.ENGINES
    54 37   
    55 38  available_csv_fields = [
    56 39   "engine",
    57 40   "name",
    58 41   "link",
    59 42   "domain"
    60  - # Todo: add description, but needs modify scraping (link_finder func) for all the engines
    61 43  ]
    62 44   
    63 45   
    skipped 3 lines
    67 49   epilog += " {}".format(f)
    68 50   epilog += "\n"
    69 51   epilog += "Supported engines: \n\t"
    70  - for e in supported_engines:
     52 + for e in supported_engines.keys():
    71 53   epilog += " {}".format(e)
    72 54   return epilog
    73 55   
    skipped 15 lines
    89 71   nargs="*")
    90 72  parser.add_argument("--field_delimiter", type=str, default=",", help='Delimiter for the CSV fields')
    91 73   
     74 + 
    92 75  args = parser.parse_args()
    93 76  proxies = {'http': 'socks5h://{}'.format(args.proxy), 'https': 'socks5h://{}'.format(args.proxy)}
    94 77  tqdm_bar_format = "{desc}: {percentage:3.0f}% |{bar}| {n_fmt:3s} / {total_fmt:3s} [{elapsed:5s} < {remaining:5s}]"
    skipped 14 lines
    109 92   return str
    110 93   
    111 94   
     95 +def get_parameter(url, parameter_name):
     96 + parsed = urlparse.urlparse(url)
     97 + return parse_qs(parsed.query)[parameter_name][0]
     98 + 
     99 + 
    112 100  def ahmia(searchstr):
    113  - ahmia_url = "http://msydqstlz2kzerdg.onion/search/?q={}"
     101 + ahmia_url = supported_engines['ahmia'] + "/search/?q={}"
    114 102   
    115 103   with tqdm(total=1, initial=0, desc="%20s" % "Ahmia", unit="req", ascii=False, ncols=120,
    116 104   bar_format=tqdm_bar_format) as progress_bar:
    117  - response = requests.get(ahmia_url.format(searchstr), proxies=proxies, headers=random_headers())
    118  - soup = BeautifulSoup(response.text, 'html.parser')
     105 + response = requests.get(ahmia_url.format(quote(searchstr)), proxies=proxies, headers=random_headers())
     106 + soup = BeautifulSoup(response.text, 'html5lib')
    119 107   link_finder("ahmia", soup)
    120 108   progress_bar.update()
    121 109   progress_bar.close()
    122 110   
    123 111   
    124 112  def torch(searchstr):
    125  - torch_url = "http://xmh57jrzrnw6insl.onion/4a1f6b371c/search.cgi?cmd=Search!&np={}&q={}"
     113 + torch_url = supported_engines['torch'] + "/4a1f6b371c/search.cgi?cmd=Search!&np={}&q={}"
    126 114   results_per_page = 10
    127 115   max_nb_page = 100
    128 116   if args.limit != 0:
    skipped 3 lines
    132 120   s.proxies = proxies
    133 121   s.headers = random_headers()
    134 122   
    135  - req = s.get(torch_url.format(0, searchstr))
    136  - soup = BeautifulSoup(req.text, 'html.parser')
     123 + req = s.get(torch_url.format(0, quote(searchstr)))
     124 + soup = BeautifulSoup(req.text, 'html5lib')
    137 125   
    138 126   page_number = 1
    139 127   for i in soup.find("table", attrs={"width": "100%"}).find_all("small"):
    skipped 10 lines
    150 138   
    151 139   # Usually range is 2 to n+1, but TORCH behaves differently
    152 140   for n in range(1, page_number):
    153  - req = s.get(torch_url.format(n, searchstr))
    154  - soup = BeautifulSoup(req.text, 'html.parser')
     141 + req = s.get(torch_url.format(n, quote(searchstr)))
     142 + soup = BeautifulSoup(req.text, 'html5lib')
    155 143   link_finder("torch", soup)
    156 144   progress_bar.update()
    157 145   
    skipped 3 lines
    161 149  def darksearchio(searchstr):
    162 150   global result
    163 151   result['darksearchio'] = []
    164  - darksearchio_url = "http://darksearch.io/api/search?query={}&page={}"
     152 + darksearchio_url = supported_engines['darksearchio'] + "/api/search?query={}&page={}"
    165 153   max_nb_page = 30
    166 154   if args.limit != 0:
    167 155   max_nb_page = args.limit
    skipped 1 lines
    169 157   with requests.Session() as s:
    170 158   s.proxies = proxies
    171 159   s.headers = random_headers()
    172  - resp = s.get(darksearchio_url.format(searchstr, 1))
     160 + resp = s.get(darksearchio_url.format(quote(searchstr), 1))
    173 161   
    174 162   page_number = 1
    175 163   if resp.status_code == 200:
    skipped 12 lines
    188 176   progress_bar.update()
    189 177   
    190 178   for n in range(2, page_number + 1):
    191  - resp = s.get(darksearchio_url.format(searchstr, n))
     179 + resp = s.get(darksearchio_url.format(quote(searchstr), n))
    192 180   if resp.status_code == 200:
    193 181   resp = resp.json()
    194 182   link_finder("darksearchio", resp['data'])
    skipped 6 lines
    201 189   
    202 190   
    203 191  def onionland(searchstr):
    204  - onionlandv3_url = "http://3bbad7fauom4d6sgppalyqddsqbf5u5p56b5k5uk2zxsy3d6ey2jobad.onion/search?q={}&page={}"
     192 + onionlandv3_url = supported_engines['onionland'] + "/search?q={}&page={}"
    205 193   max_nb_page = 100
    206 194   if args.limit != 0:
    207 195   max_nb_page = args.limit
    skipped 2 lines
    210 198   s.proxies = proxies
    211 199   s.headers = random_headers()
    212 200   
    213  - resp = s.get(onionlandv3_url.format(searchstr, 1))
    214  - soup = BeautifulSoup(resp.text, 'html.parser')
     201 + resp = s.get(onionlandv3_url.format(quote(searchstr), 1))
     202 + soup = BeautifulSoup(resp.text, 'html5lib')
    215 203   
    216 204   page_number = 1
    217 205   for i in soup.find_all('div', attrs={"class": "search-status"}):
    skipped 17 lines
    235 223   progress_bar.update()
    236 224   
    237 225   for n in range(2, page_number + 1):
    238  - resp = s.get(onionlandv3_url.format(searchstr, n))
    239  - soup = BeautifulSoup(resp.text, 'html.parser')
     226 + resp = s.get(onionlandv3_url.format(quote(searchstr), n))
     227 + soup = BeautifulSoup(resp.text, 'html5lib')
    240 228   ret = link_finder("onionland", soup)
    241 229   if ret < 0:
    242 230   break
    skipped 3 lines
    246 234   
    247 235   
    248 236  def notevil(searchstr):
    249  - notevil_url1 = "http://hss3uro2hsxfogfq.onion/index.php?q={}"
    250  - notevil_url2 = "http://hss3uro2hsxfogfq.onion/index.php?q={}&hostLimit=20&start={}&numRows={}&template=0"
     237 + notevil_url1 = supported_engines['notevil'] + "/index.php?q={}"
     238 + notevil_url2 = supported_engines['notevil'] + "/index.php?q={}&hostLimit=20&start={}&numRows={}&template=0"
    251 239   max_nb_page = 20
    252 240   if args.limit != 0:
    253 241   max_nb_page = args.limit
    254 242   
    255 243   # Do not use requests.Session() here (by experience less results would be got)
    256  - req = requests.get(notevil_url1.format(searchstr), proxies=proxies, headers=random_headers())
    257  - soup = BeautifulSoup(req.text, 'html.parser')
     244 + req = requests.get(notevil_url1.format(quote(searchstr)), proxies=proxies, headers=random_headers())
     245 + soup = BeautifulSoup(req.text, 'html5lib')
    258 246   
    259 247   page_number = 1
    260 248   last_div = soup.find("div", attrs={"style": "text-align:center"}).find("div", attrs={"style": "text-align:center"})
    skipped 12 lines
    273 261   
    274 262   for n in range(2, page_number + 1):
    275 263   start = (int(n - 1) * num_rows)
    276  - req = requests.get(notevil_url2.format(searchstr, start, num_rows),
     264 + req = requests.get(notevil_url2.format(quote(searchstr), start, num_rows),
    277 265   proxies=proxies,
    278 266   headers=random_headers())
    279  - soup = BeautifulSoup(req.text, 'html.parser')
     267 + soup = BeautifulSoup(req.text, 'html5lib')
    280 268   link_finder("notevil", soup)
    281 269   progress_bar.update()
    282 270   time.sleep(1)
    skipped 2 lines
    285 273   
    286 274   
    287 275  def visitor(searchstr):
    288  - visitor_url = "http://visitorfi5kl7q7i.onion/search/?q={}&page={}"
     276 + visitor_url = supported_engines['visitor'] + "/search/?q={}&page={}"
    289 277   max_nb_page = 30
    290 278   if args.limit != 0:
    291 279   max_nb_page = args.limit
    skipped 12 lines
    304 292   s.headers = random_headers()
    305 293   
    306 294   while continue_processing:
    307  - resp = s.get(visitor_url.format(searchstr, page_to_request))
    308  - soup = BeautifulSoup(resp.text, 'html.parser')
     295 + resp = s.get(visitor_url.format(quote(searchstr), page_to_request))
     296 + soup = BeautifulSoup(resp.text, 'html5lib')
    309 297   link_finder("visitor", soup)
    310 298   progress_bar.update()
    311 299   
    skipped 7 lines
    319 307   
    320 308   
    321 309  def darksearchenginer(searchstr):
    322  - darksearchenginer_url = "http://7pwy57iklvt6lyhe.onion/"
     310 + darksearchenginer_url = supported_engines['darksearchenginer']
    323 311   max_nb_page = 20
    324 312   if args.limit != 0:
    325 313   max_nb_page = args.limit
    skipped 5 lines
    331 319   
    332 320   # Note that this search engine is very likely to timeout
    333 321   resp = s.post(darksearchenginer_url, data={"search[keyword]": searchstr, "page": page_number})
    334  - soup = BeautifulSoup(resp.text, 'html.parser')
     322 + soup = BeautifulSoup(resp.text, 'html5lib')
    335 323   
    336 324   pages_input = soup.find_all("input", attrs={"name": "page"})
    337 325   for i in pages_input:
    skipped 9 lines
    347 335   
    348 336   for n in range(2, page_number + 1):
    349 337   resp = s.post(darksearchenginer_url, data={"search[keyword]": searchstr, "page": str(n)})
    350  - soup = BeautifulSoup(resp.text, 'html.parser')
     338 + soup = BeautifulSoup(resp.text, 'html5lib')
    351 339   link_finder("darksearchenginer", soup)
    352 340   progress_bar.update()
    353 341   
    skipped 1 lines
    355 343   
    356 344   
    357 345  def phobos(searchstr):
    358  - phobos_url = "http://phobosxilamwcg75xt22id7aywkzol6q6rfl2flipcqoc4e4ahima5id.onion/search?query={}&p={}"
     346 + phobos_url = supported_engines['phobos'] + "/search?query={}&p={}"
    359 347   max_nb_page = 100
    360 348   if args.limit != 0:
    361 349   max_nb_page = args.limit
    skipped 2 lines
    364 352   s.proxies = proxies
    365 353   s.headers = random_headers()
    366 354   
    367  - resp = s.get(phobos_url.format(searchstr, 1), proxies=proxies, headers=random_headers())
    368  - soup = BeautifulSoup(resp.text, 'html.parser')
     355 + resp = s.get(phobos_url.format(quote(searchstr), 1), proxies=proxies, headers=random_headers())
     356 + soup = BeautifulSoup(resp.text, 'html5lib')
    369 357   
    370 358   page_number = 1
    371 359   pages = soup.find("div", attrs={"class": "pages"}).find_all('a')
    skipped 10 lines
    382 370   progress_bar.update()
    383 371   
    384 372   for n in range(2, page_number + 1):
    385  - resp = s.get(phobos_url.format(searchstr, n), proxies=proxies, headers=random_headers())
    386  - soup = BeautifulSoup(resp.text, 'html.parser')
     373 + resp = s.get(phobos_url.format(quote(searchstr), n), proxies=proxies, headers=random_headers())
     374 + soup = BeautifulSoup(resp.text, 'html5lib')
    387 375   link_finder("phobos", soup)
    388 376   progress_bar.update()
    389 377   
    skipped 1 lines
    391 379   
    392 380   
    393 381  def onionsearchserver(searchstr):
    394  - onionsearchserver_url1 = "http://oss7wrm7xvoub77o.onion/oss/"
     382 + onionsearchserver_url1 = supported_engines['onionsearchserver'] + "/oss/"
    395 383   onionsearchserver_url2 = None
    396 384   results_per_page = 10
    397 385   max_nb_page = 100
    skipped 5 lines
    403 391   s.headers = random_headers()
    404 392   
    405 393   resp = s.get(onionsearchserver_url1)
    406  - soup = BeautifulSoup(resp.text, 'html.parser')
     394 + soup = BeautifulSoup(resp.text, 'html5lib')
    407 395   for i in soup.find_all('iframe', attrs={"style": "display:none;"}):
    408 396   onionsearchserver_url2 = i['src'] + "{}&page={}"
    409 397   
    410 398   if onionsearchserver_url2 is None:
    411 399   return -1
    412 400   
    413  - resp = s.get(onionsearchserver_url2.format(searchstr, 1))
    414  - soup = BeautifulSoup(resp.text, 'html.parser')
     401 + resp = s.get(onionsearchserver_url2.format(quote(searchstr), 1))
     402 + soup = BeautifulSoup(resp.text, 'html5lib')
    415 403   
    416 404   page_number = 1
    417 405   pages = soup.find_all("div", attrs={"class": "osscmnrdr ossnumfound"})
    skipped 10 lines
    428 416   progress_bar.update()
    429 417   
    430 418   for n in range(2, page_number + 1):
    431  - resp = s.get(onionsearchserver_url2.format(searchstr, n))
    432  - soup = BeautifulSoup(resp.text, 'html.parser')
     419 + resp = s.get(onionsearchserver_url2.format(quote(searchstr), n))
     420 + soup = BeautifulSoup(resp.text, 'html5lib')
    433 421   link_finder("onionsearchserver", soup)
    434 422   progress_bar.update()
    435 423   
    skipped 2 lines
    438 426   
    439 427  def grams(searchstr):
    440 428   # No multi pages handling as it is very hard to get many results on this engine
    441  - grams_url1 = "http://grams7enqfy4nieo.onion/"
    442  - grams_url2 = "http://grams7enqfy4nieo.onion/results"
     429 + grams_url1 = supported_engines['grams']
     430 + grams_url2 = supported_engines['grams'] + "/results"
    443 431   
    444 432   with requests.Session() as s:
    445 433   s.proxies = proxies
    446 434   s.headers = random_headers()
    447 435   
    448 436   resp = s.get(grams_url1)
    449  - soup = BeautifulSoup(resp.text, 'html.parser')
     437 + soup = BeautifulSoup(resp.text, 'html5lib')
    450 438   token = soup.find('input', attrs={'name': '_token'})['value']
    451 439   
    452 440   with tqdm(total=1, initial=0, desc="%20s" % "Grams", unit="req", ascii=False, ncols=120,
    453 441   bar_format=tqdm_bar_format) as progress_bar:
    454 442   resp = s.post(grams_url2, data={"req": searchstr, "_token": token})
    455  - soup = BeautifulSoup(resp.text, 'html.parser')
     443 + soup = BeautifulSoup(resp.text, 'html5lib')
    456 444   link_finder("grams", soup)
    457 445   progress_bar.update()
    458 446   progress_bar.close()
    459 447   
    460 448   
    461 449  def candle(searchstr):
    462  - candle_url = "http://gjobjn7ievumcq6z.onion/?q={}"
     450 + candle_url = supported_engines['candle'] + "/?q={}"
    463 451   
    464 452   with tqdm(total=1, initial=0, desc="%20s" % "Candle", unit="req", ascii=False, ncols=120,
    465 453   bar_format=tqdm_bar_format) as progress_bar:
    466  - response = requests.get(candle_url.format(searchstr), proxies=proxies, headers=random_headers())
    467  - soup = BeautifulSoup(response.text, 'html.parser')
     454 + response = requests.get(candle_url.format(quote(searchstr)), proxies=proxies, headers=random_headers())
     455 + soup = BeautifulSoup(response.text, 'html5lib')
    468 456   link_finder("candle", soup)
    469 457   progress_bar.update()
    470 458   progress_bar.close()
    471 459   
    472 460   
    473 461  def torsearchengine(searchstr):
    474  - torsearchengine_url = "http://searchcoaupi3csb.onion/search/move/?q={}&pn={}&num=10&sdh=&"
     462 + torsearchengine_url = supported_engines['torsearchengine'] + "/search/move/?q={}&pn={}&num=10&sdh=&"
    475 463   max_nb_page = 100
    476 464   if args.limit != 0:
    477 465   max_nb_page = args.limit
    skipped 2 lines
    480 468   s.proxies = proxies
    481 469   s.headers = random_headers()
    482 470   
    483  - resp = s.get(torsearchengine_url.format(searchstr, 1))
    484  - soup = BeautifulSoup(resp.text, 'html.parser')
     471 + resp = s.get(torsearchengine_url.format(quote(searchstr), 1))
     472 + soup = BeautifulSoup(resp.text, 'html5lib')
    485 473   
    486 474   page_number = 1
    487 475   for i in soup.find_all('div', attrs={"id": "subheader"}):
    skipped 11 lines
    499 487   progress_bar.update()
    500 488   
    501 489   for n in range(2, page_number + 1):
    502  - resp = s.get(torsearchengine_url.format(searchstr, n))
    503  - soup = BeautifulSoup(resp.text, 'html.parser')
     490 + resp = s.get(torsearchengine_url.format(quote(searchstr), n))
     491 + soup = BeautifulSoup(resp.text, 'html5lib')
    504 492   ret = link_finder("torsearchengine", soup)
    505 493   progress_bar.update()
    506 494   
    skipped 1 lines
    508 496   
    509 497   
    510 498  def torgle(searchstr):
    511  - torgle_url = "http://torglejzid2cyoqt.onion/search.php?term={}"
     499 + torgle_url = supported_engines['torgle'] + "/search.php?term={}"
    512 500   
    513 501   with tqdm(total=1, initial=0, desc="%20s" % "Torgle", unit="req", ascii=False, ncols=120,
    514 502   bar_format=tqdm_bar_format) as progress_bar:
    515  - response = requests.get(torgle_url.format(searchstr), proxies=proxies, headers=random_headers())
    516  - soup = BeautifulSoup(response.text, 'html.parser')
     503 + response = requests.get(torgle_url.format(quote(searchstr)), proxies=proxies, headers=random_headers())
     504 + soup = BeautifulSoup(response.text, 'html5lib')
    517 505   link_finder("torgle", soup)
    518 506   progress_bar.update()
    519 507   progress_bar.close()
    520 508   
    521 509   
    522 510  def onionsearchengine(searchstr):
    523  - onionsearchengine_url = "http://onionf4j3fwqpeo5.onion/search.php?search={}&submit=Search&page={}"
     511 + onionsearchengine_url = supported_engines['onionsearchengine'] + "/search.php?search={}&submit=Search&page={}"
     512 + # same as onionsearchengine_url = "http://5u56fjmxu63xcmbk.onion/search.php?search={}&submit=Search&page={}"
    524 513   max_nb_page = 100
    525 514   if args.limit != 0:
    526 515   max_nb_page = args.limit
    skipped 2 lines
    529 518   s.proxies = proxies
    530 519   s.headers = random_headers()
    531 520   
    532  - resp = s.get(onionsearchengine_url.format(searchstr, 1))
    533  - soup = BeautifulSoup(resp.text, 'html.parser')
     521 + resp = s.get(onionsearchengine_url.format(quote(searchstr), 1))
     522 + soup = BeautifulSoup(resp.text, 'html5lib')
    534 523   
    535 524   page_number = 1
    536 525   approx_re = re.search(r"\s([0-9]+)\sresult[s]?\sfound\s!.*", clear(soup.find('body').get_text()))
    skipped 11 lines
    548 537   progress_bar.update()
    549 538   
    550 539   for n in range(2, page_number + 1):
    551  - resp = s.get(onionsearchengine_url.format(searchstr, n))
    552  - soup = BeautifulSoup(resp.text, 'html.parser')
     540 + resp = s.get(onionsearchengine_url.format(quote(searchstr), n))
     541 + soup = BeautifulSoup(resp.text, 'html5lib')
    553 542   link_finder("onionsearchengine", soup)
    554 543   progress_bar.update()
    555 544   
    skipped 1 lines
    557 546   
    558 547   
    559 548  def tordex(searchstr):
    560  - tordex_url = "http://tordex7iie7z2wcg.onion/search?query={}&page={}"
     549 + tordex_url = supported_engines['tordex'] + "/search?query={}&page={}"
    561 550   max_nb_page = 100
    562 551   if args.limit != 0:
    563 552   max_nb_page = args.limit
    skipped 2 lines
    566 555   s.proxies = proxies
    567 556   s.headers = random_headers()
    568 557   
    569  - resp = s.get(tordex_url.format(searchstr, 1))
    570  - soup = BeautifulSoup(resp.text, 'html.parser')
     558 + resp = s.get(tordex_url.format(quote(searchstr), 1))
     559 + soup = BeautifulSoup(resp.text, 'html5lib')
    571 560   
    572 561   page_number = 1
    573 562   pages = soup.find_all("li", attrs={"class": "page-item"})
    skipped 11 lines
    585 574   progress_bar.update()
    586 575   
    587 576   for n in range(2, page_number + 1):
    588  - resp = s.get(tordex_url.format(searchstr, n))
    589  - soup = BeautifulSoup(resp.text, 'html.parser')
     577 + resp = s.get(tordex_url.format(quote(searchstr), n))
     578 + soup = BeautifulSoup(resp.text, 'html5lib')
    590 579   link_finder("tordex", soup)
    591 580   progress_bar.update()
    592 581   
    skipped 1 lines
    594 583   
    595 584   
    596 585  def tor66(searchstr):
    597  - tor66_url = "http://tor66sezptuu2nta.onion/search?q={}&sorttype=rel&page={}"
     586 + tor66_url = supported_engines['tor66'] + "/search?q={}&sorttype=rel&page={}"
    598 587   max_nb_page = 30
    599 588   if args.limit != 0:
    600 589   max_nb_page = args.limit
    skipped 2 lines
    603 592   s.proxies = proxies
    604 593   s.headers = random_headers()
    605 594   
    606  - resp = s.get(tor66_url.format(searchstr, 1))
    607  - soup = BeautifulSoup(resp.text, 'html.parser')
     595 + resp = s.get(tor66_url.format(quote(searchstr), 1))
     596 + soup = BeautifulSoup(resp.text, 'html5lib')
    608 597   
    609 598   page_number = 1
    610 599   approx_re = re.search(r"\.Onion\ssites\sfound\s:\s([0-9]+)",
    skipped 12 lines
    623 612   progress_bar.update()
    624 613   
    625 614   for n in range(2, page_number + 1):
    626  - resp = s.get(tor66_url.format(searchstr, n))
    627  - soup = BeautifulSoup(resp.text, 'html.parser')
     615 + resp = s.get(tor66_url.format(quote(searchstr), n))
     616 + soup = BeautifulSoup(resp.text, 'html5lib')
    628 617   link_finder("tor66", soup)
    629 618   progress_bar.update()
    630 619   
    skipped 1 lines
    632 621   
    633 622   
    634 623  def tormax(searchstr):
    635  - tormax_url = "http://tormaxunodsbvtgo.onion/tormax/search?q={}"
     624 + tormax_url = supported_engines['tormax'] + "/tormax/search?q={}"
    636 625   
    637 626   with tqdm(total=1, initial=0, desc="%20s" % "Tormax", unit="req", ascii=False, ncols=120,
    638 627   bar_format=tqdm_bar_format) as progress_bar:
    639  - response = requests.get(tormax_url.format(searchstr), proxies=proxies, headers=random_headers())
    640  - soup = BeautifulSoup(response.text, 'html.parser')
     628 + response = requests.get(tormax_url.format(quote(searchstr)), proxies=proxies, headers=random_headers())
     629 + soup = BeautifulSoup(response.text, 'html5lib')
    641 630   link_finder("tormax", soup)
    642 631   progress_bar.update()
    643 632   progress_bar.close()
    644 633   
    645 634   
    646 635  def haystack(searchstr):
    647  - haystack_url = "http://haystakvxad7wbk5.onion/?q={}&offset={}"
     636 + haystack_url = supported_engines['haystack'] + "/?q={}&offset={}"
    648 637   # At the 52nd page, it timeouts 100% of the time
    649 638   max_nb_page = 50
    650 639   if args.limit != 0:
    skipped 4 lines
    655 644   s.proxies = proxies
    656 645   s.headers = random_headers()
    657 646   
    658  - req = s.get(haystack_url.format(searchstr, 0))
    659  - soup = BeautifulSoup(req.text, 'html.parser')
     647 + req = s.get(haystack_url.format(quote(searchstr), 0))
     648 + soup = BeautifulSoup(req.text, 'html5lib')
    660 649   
    661 650   bar_max = None
    662 651   if args.barmode == "fixed":
    skipped 11 lines
    674 663   it = 1
    675 664   while continue_processing:
    676 665   offset = int(it * offset_coeff)
    677  - req = s.get(haystack_url.format(searchstr, offset))
    678  - soup = BeautifulSoup(req.text, 'html.parser')
     666 + req = s.get(haystack_url.format(quote(searchstr), offset))
     667 + soup = BeautifulSoup(req.text, 'html5lib')
    679 668   ret = link_finder("haystack", soup)
    680 669   progress_bar.update()
    681 670   it += 1
    skipped 2 lines
    684 673   
    685 674   
    686 675  def multivac(searchstr):
    687  - multivac_url = "http://multivacigqzqqon.onion/?q={}&page={}"
     676 + multivac_url = supported_engines['multivac'] + "/?q={}&page={}"
    688 677   max_nb_page = 10
    689 678   if args.limit != 0:
    690 679   max_nb_page = args.limit
    skipped 3 lines
    694 683   s.headers = random_headers()
    695 684   
    696 685   page_to_request = 1
    697  - req = s.get(multivac_url.format(searchstr, page_to_request))
    698  - soup = BeautifulSoup(req.text, 'html.parser')
     686 + req = s.get(multivac_url.format(quote(searchstr), page_to_request))
     687 + soup = BeautifulSoup(req.text, 'html5lib')
    699 688   
    700 689   bar_max = None
    701 690   if args.barmode == "fixed":
    skipped 10 lines
    712 701   
    713 702   while continue_processing:
    714 703   page_to_request += 1
    715  - req = s.get(multivac_url.format(searchstr, page_to_request))
    716  - soup = BeautifulSoup(req.text, 'html.parser')
     704 + req = s.get(multivac_url.format(quote(searchstr), page_to_request))
     705 + soup = BeautifulSoup(req.text, 'html5lib')
    717 706   ret = link_finder("multivac", soup)
    718 707   progress_bar.update()
    719 708   
    skipped 2 lines
    722 711   
    723 712   
    724 713  def evosearch(searchstr):
    725  - evosearch_url = "http://evo7no6twwwrm63c.onion/evo/search.php?" \
     714 + evosearch_url = supported_engines['evosearch'] + "/evo/search.php?" \
    726 715   "query={}&" \
    727 716   "start={}&" \
    728 717   "search=1&type=and&mark=bold+text&" \
    skipped 7 lines
    736 725   s.proxies = proxies
    737 726   s.headers = random_headers()
    738 727   
    739  - req = s.get(evosearch_url.format(searchstr, 1, results_per_page))
    740  - soup = BeautifulSoup(req.text, 'html.parser')
     728 + req = s.get(evosearch_url.format(quote(searchstr), 1, results_per_page))
     729 + soup = BeautifulSoup(req.text, 'html5lib')
    741 730   
    742 731   page_number = 1
    743 732   i = soup.find("p", attrs={"class": "cntr"})
    744 733   if i is not None:
    745 734   if i.get_text() is not None and "of" in i.get_text():
    746 735   nb_res = float(clear(str.split(i.get_text().split("-")[1].split("of")[1])[0]))
    747  - # The results page loads in two times, it is hard not to lose the second part
    748  - page_number = math.ceil(nb_res / (results_per_page / 2))
     736 + page_number = math.ceil(nb_res / results_per_page)
    749 737   if page_number > max_nb_page:
    750 738   page_number = max_nb_page
    751 739   
    skipped 4 lines
    756 744   progress_bar.update()
    757 745   
    758 746   for n in range(2, page_number + 1):
    759  - resp = s.get(evosearch_url.format(searchstr, n, results_per_page))
    760  - soup = BeautifulSoup(resp.text, 'html.parser')
     747 + resp = s.get(evosearch_url.format(quote(searchstr), n, results_per_page))
     748 + soup = BeautifulSoup(resp.text, 'html5lib')
    761 749   link_finder("evosearch", soup)
    762 750   progress_bar.update()
    763 751   
    skipped 1 lines
    765 753   
    766 754   
    767 755  def oneirun(searchstr):
    768  - oneirun_url = "http://oneirunda366dmfm.onion/Home/IndexEn"
     756 + oneirun_url = supported_engines['oneirun'] + "/Home/IndexEn"
    769 757   
    770 758   with requests.Session() as s:
    771 759   s.proxies = proxies
    772 760   s.headers = random_headers()
    773 761   
    774 762   resp = s.get(oneirun_url)
    775  - soup = BeautifulSoup(resp.text, 'html.parser')
     763 + soup = BeautifulSoup(resp.text, 'html5lib')
    776 764   token = soup.find('input', attrs={"name": "__RequestVerificationToken"})['value']
    777 765   
    778 766   with tqdm(total=1, initial=0, desc="%20s" % "Oneirun", unit="req", ascii=False, ncols=120,
    779 767   bar_format=tqdm_bar_format) as progress_bar:
    780  - response = s.post(oneirun_url.format(searchstr),
     768 + response = s.post(oneirun_url.format(quote(searchstr)),
    781 769   data={"searchString": searchstr, "__RequestVerificationToken": token})
    782  - soup = BeautifulSoup(response.text, 'html.parser')
     770 + soup = BeautifulSoup(response.text, 'html5lib')
    783 771   link_finder("oneirun", soup)
    784 772   progress_bar.update()
    785 773   progress_bar.close()
    786 774   
    787 775   
    788 776  def deeplink(searchstr):
    789  - deeplink_url1 = "http://deeplinkdeatbml7.onion/index.php"
    790  - deeplink_url2 = "http://deeplinkdeatbml7.onion/?search={}&type=verified"
     777 + deeplink_url1 = supported_engines['deeplink'] + "/index.php"
     778 + deeplink_url2 = supported_engines['deeplink'] + "/?search={}&type=verified"
    791 779   
    792 780   with requests.Session() as s:
    793 781   s.proxies = proxies
    skipped 2 lines
    796 784   
    797 785   with tqdm(total=1, initial=0, desc="%20s" % "DeepLink", unit="req", ascii=False, ncols=120,
    798 786   bar_format=tqdm_bar_format) as progress_bar:
    799  - response = s.get(deeplink_url2.format(searchstr))
    800  - soup = BeautifulSoup(response.text, 'html.parser')
     787 + response = s.get(deeplink_url2.format(quote(searchstr)))
     788 + soup = BeautifulSoup(response.text, 'html5lib')
    801 789   link_finder("deeplink", soup)
    802 790   progress_bar.update()
    803 791   progress_bar.close()
    804 792   
    805 793   
     794 +def torsearchengine1(searchstr):
     795 + torsearchengine1_url1 = supported_engines['torsearchengine1']
     796 + torsearchengine1_url2 = supported_engines['torsearchengine1'] + "/index.php"
     797 + 
     798 + with requests.Session() as s:
     799 + s.proxies = proxies
     800 + s.headers = random_headers()
     801 + s.get(torsearchengine1_url1)
     802 + 
     803 + with tqdm(total=1, initial=0, desc="%20s" % "TOR Search Engine", unit="req", ascii=False, ncols=120,
     804 + bar_format=tqdm_bar_format) as progress_bar:
     805 + response = s.post(torsearchengine1_url2, {'search': searchstr, 'search2': ''})
     806 + soup = BeautifulSoup(response.text, 'html5lib')
     807 + link_finder("torsearchengine1", soup)
     808 + progress_bar.update()
     809 + progress_bar.close()
     810 + 
     811 + 
     812 +def torgle1(searchstr):
     813 + torgle1_url = supported_engines['torgle1'] + "/torgle/index-frame.php?query={}&search=1&engine-ver=2&isframe=0{}"
     814 + results_per_page = 10
     815 + max_nb_page = 30
     816 + if args.limit != 0:
     817 + max_nb_page = args.limit
     818 + 
     819 + with requests.Session() as s:
     820 + s.proxies = proxies
     821 + s.headers = random_headers()
     822 + 
     823 + resp = s.get(torgle1_url.format(quote(searchstr), ""))
     824 + soup = BeautifulSoup(resp.text, 'html5lib')
     825 + 
     826 + page_number = 1
     827 + i = soup.find('div', attrs={"id": "result_report"})
     828 + if i is not None:
     829 + if i.get_text() is not None and "of" in i.get_text():
     830 + res_re = re.match(r".*of\s([0-9]+)\s.*", clear(i.get_text()))
     831 + total_results = int(res_re.group(1))
     832 + page_number = math.ceil(total_results / results_per_page)
     833 + if page_number > max_nb_page:
     834 + page_number = max_nb_page
     835 + 
     836 + with tqdm(total=page_number, initial=0, desc="%20s" % "Torgle", unit="req", ascii=False, ncols=120,
     837 + bar_format=tqdm_bar_format) as progress_bar:
     838 + 
     839 + link_finder("torgle1", soup)
     840 + progress_bar.update()
     841 + 
     842 + for n in range(2, page_number + 1):
     843 + start_page_param = "&start={}".format(n)
     844 + resp = s.get(torgle1_url.format(quote(searchstr), start_page_param))
     845 + soup = BeautifulSoup(resp.text, 'html5lib')
     846 + link_finder("torgle1", soup)
     847 + progress_bar.update()
     848 + 
     849 + progress_bar.close()
     850 + 
     851 + 
     852 +def grams1(searchstr):
     853 + grams1_url = supported_engines['grams1'] + "/results/index.php?page={}&searchstr={}"
     854 + results_per_page = 25
     855 + max_nb_page = 30
     856 + if args.limit != 0:
     857 + max_nb_page = args.limit
     858 + 
     859 + with requests.Session() as s:
     860 + s.proxies = proxies
     861 + s.headers = random_headers()
     862 + 
     863 + resp = s.get(grams1_url.format(1, quote(searchstr)))
     864 + soup = BeautifulSoup(resp.text, 'html5lib')
     865 + 
     866 + page_number = 1
     867 + pages = soup.find_all('div', attrs={"class": "result-text"})
     868 + if pages is not None:
     869 + res_re = re.match(r"About ([0-9]+) result(.*)", clear(pages[0].get_text()))
     870 + total_results = int(res_re.group(1))
     871 + page_number = math.ceil(total_results / results_per_page)
     872 + if page_number > max_nb_page:
     873 + page_number = max_nb_page
     874 + 
     875 + with tqdm(total=page_number, initial=0, desc="%20s" % "Grams", unit="req", ascii=False, ncols=120,
     876 + bar_format=tqdm_bar_format) as progress_bar:
     877 + 
     878 + link_finder("grams1", soup)
     879 + progress_bar.update()
     880 + 
     881 + for n in range(2, page_number + 1):
     882 + resp = s.get(grams1_url.format(n, quote(searchstr)))
     883 + soup = BeautifulSoup(resp.text, 'html5lib')
     884 + link_finder("grams1", soup)
     885 + progress_bar.update()
     886 + 
     887 + progress_bar.close()
     888 + 
     889 + 
    806 890  def get_domain_from_url(link):
    807 891   fqdn_re = r"^[a-z][a-z0-9+\-.]*://([a-z0-9\-._~%!$&'()*+,;=]+@)?([a-z0-9\-._~%]+|\[[a-z0-9\-._~%!$&'()*+,;=:]+\])"
    808 892   domain_re = re.match(fqdn_re, link)
    skipped 35 lines
    844 928   def append_link():
    845 929   nonlocal has_result
    846 930   has_result = True
    847  - 
    848 931   result[engine_str].append({"name": name, "link": link})
    849 932   
    850 933   if args.continuous_write and csv_file.writable():
    851  - csv_writer = csv.writer(csv_file, delimiter=field_delim, quoting=csv.QUOTE_NONNUMERIC)
     934 + csv_writer = csv.writer(csv_file, delimiter=field_delim, quoting=csv.QUOTE_ALL)
    852 935   fields = {"engine": engine_str, "name": name, "link": link}
    853 936   write_to_csv(csv_writer, fields)
    854 937   
    skipped 1 lines
    856 939   result[engine_str] = []
    857 940   
    858 941   if engine_str == "ahmia":
    859  - for i in data_obj.find_all('li', attrs={'class': 'result'}):
    860  - i = i.find('h4')
    861  - name = clear(i.get_text())
    862  - link = i.find('a')['href'].replace("/search/search/redirect?search_term={}&redirect_url="
    863  - .format(args.search), "")
     942 + for r in data_obj.select('li.result h4'):
     943 + name = clear(r.get_text())
     944 + link = r.find('a')['href'].split('redirect_url=')[1]
    864 945   append_link()
    865 946   
    866 947   if engine_str == "candle":
    867  - html_page = data_obj.find('html')
    868  - if html_page:
    869  - for i in data_obj.find('html').find_all('a'):
    870  - if str(i['href']).startswith("http"):
    871  - name = clear(i.get_text())
    872  - link = clear(i['href'])
    873  - append_link()
     948 + for r in data_obj.select("body h2 a"):
     949 + if str(r['href']).startswith("http"):
     950 + name = clear(r.get_text())
     951 + link = clear(r['href'])
     952 + append_link()
    874 953   
    875 954   if engine_str == "darksearchenginer":
    876  - for i in data_obj.find('div', attrs={"class": "table-responsive"}).find_all('a'):
    877  - name = clear(i.get_text())
    878  - link = clear(i['href'])
     955 + for r in data_obj.select('.table-responsive a'):
     956 + name = clear(r.get_text())
     957 + link = clear(r['href'])
    879 958   append_link()
    880 959   
    881 960   if engine_str == "darksearchio":
    skipped 11 lines
    893 972   append_link()
    894 973   
    895 974   if engine_str == "evosearch":
    896  - if data_obj.find('div', attrs={"id": "results"}) is not None:
    897  - for div in data_obj.find('div', attrs={"id": "results"}).find_all('div', attrs={"class": "odrow"}):
    898  - name = clear(div.find('div', attrs={"class": "title"}).find('a').get_text())
    899  - link = clear(div.find('div', attrs={"class": "title"}).find('a')['href']
    900  - .replace("./include/click_counter.php?url=", "")
    901  - .replace("&query={}".format(args.search), ""))
    902  - append_link()
     975 + for r in data_obj.select("#results .title a"):
     976 + name = clear(r.get_text())
     977 + link = get_parameter(r['href'], 'url')
     978 + append_link()
    903 979   
    904 980   if engine_str == "grams":
    905 981   for i in data_obj.find_all("div", attrs={"class": "media-body"}):
    906 982   if not i.find('span'):
    907  - for j in i.find_all('a'):
    908  - if str(j.get_text()).startswith("http"):
    909  - link = j.get_text()
    910  - else:
    911  - name = j.get_text()
    912  - append_link()
     983 + for r in i.select(".searchlinks a"):
     984 + name = clear(r.get_text())
     985 + link = clear(r['href'])
     986 + append_link()
     987 + 
     988 + if engine_str == "grams1":
     989 + for r in data_obj.select(".searchlinks a"):
     990 + name = clear(r.get_text())
     991 + link = clear(r['href'])
     992 + append_link()
    913 993   
    914 994   if engine_str == "haystack":
    915  - if data_obj.find('div', attrs={"class": "result"}) is not None:
    916  - for div in data_obj.find_all('div', attrs={"class": "result"}):
    917  - if div.find('a') is not None and div.find('i') is not None:
    918  - name = clear(div.find('a').get_text())
    919  - link = clear(div.find('i').get_text())
    920  - append_link()
     995 + for r in data_obj.select(".result b a"):
     996 + name = clear(r.get_text())
     997 + link = get_parameter(r['href'], 'url')
     998 + append_link()
    921 999   
    922 1000   if engine_str == "multivac":
    923  - for i in data_obj.find_all('dl'):
    924  - link_tag = i.find('a')
    925  - if link_tag:
    926  - if link_tag['href'] != "":
    927  - name = clear(link_tag.get_text())
    928  - link = clear(link_tag['href'])
    929  - append_link()
    930  - else:
    931  - break
     1001 + for r in data_obj.select("dl dt a"):
     1002 + if r['href'] != "":
     1003 + name = clear(r.get_text())
     1004 + link = clear(r['href'])
     1005 + append_link()
     1006 + else:
     1007 + break
    932 1008   
    933 1009   if engine_str == "notevil":
    934  - ''' As for OnionLand, we could use the span instead of the href to get a beautiful link
    935  - However some useful links are shown under the "li" tag,
    936  - and there we would not be able to have a sanitized version
    937  - Thus, the best is to implement a generic sanitize function. '''
    938  - for i in data_obj.find_all('p'):
    939  - name = clear(i.find('a').get_text())
    940  - link = i.find('a')['href'].replace("./r2d.php?url=", "")
    941  - append_link()
    942  - for i in data_obj.find_all('li'):
    943  - name = clear(i.find('a').get_text())
    944  - link = i.find('a')['href'].replace("./r2d.php?url=", "")
     1010 + for r in data_obj.select('#content > div > p > a:not([target])'):
     1011 + name = clear(r.get_text())
     1012 + link = get_parameter(r['href'], 'url')
    945 1013   append_link()
    946 1014   
    947 1015   if engine_str == "oneirun":
    skipped 3 lines
    951 1019   append_link()
    952 1020   
    953 1021   if engine_str == "onionland":
    954  - if not data_obj.find('div', attrs={"class": "row no-result-row"}):
    955  - for i in data_obj.find_all('div', attrs={"class": "result-block"}):
    956  - if not str(clear(i.find('div', attrs={'class': "title"}).find('a')['href'])).startswith("/ads"):
    957  - name = clear(i.find('div', attrs={'class': "title"}).get_text())
    958  - link = clear(i.find('div', attrs={'class': "link"}).get_text())
    959  - append_link()
     1022 + for r in data_obj.select('.result-block .title a'):
     1023 + if not r['href'].startswith('/ads/'):
     1024 + name = clear(r.get_text())
     1025 + link = unquote(unquote(get_parameter(r['href'], 'l')))
     1026 + append_link()
    960 1027   
    961 1028   if engine_str == "onionsearchengine":
    962  - for i in data_obj.find_all('table'):
    963  - for j in i.find_all('a'):
    964  - if str(j['href']).startswith("url.php?u=") and not str(j.get_text()).startswith("http://"):
    965  - name = clear(j.get_text())
    966  - link = clear(str(j['href']).replace("url.php?u=", ""))
    967  - append_link()
     1029 + for r in data_obj.select("table a b"):
     1030 + name = clear(r.get_text())
     1031 + link = get_parameter(r.parent['href'], 'u')
     1032 + append_link()
    968 1033   
    969 1034   if engine_str == "onionsearchserver":
    970  - for i in data_obj.find_all('div', attrs={"class": "osscmnrdr ossfieldrdr1"}):
    971  - name = clear(i.find('a').get_text())
    972  - link = clear(i.find('a')['href'])
     1035 + for r in data_obj.select('.osscmnrdr.ossfieldrdr1 a'):
     1036 + name = clear(r.get_text())
     1037 + link = clear(r['href'])
    973 1038   append_link()
    974 1039   
    975 1040   if engine_str == "phobos":
    976  - links = data_obj.find('div', attrs={"class": "serp"}).find_all('a', attrs={"class": "titles"})
    977  - for i in links:
    978  - name = clear(i.get_text())
    979  - link = clear(i['href'])
     1041 + for r in data_obj.select('.serp .titles'):
     1042 + name = clear(r.get_text())
     1043 + link = clear(r['href'])
    980 1044   append_link()
    981 1045   
    982 1046   if engine_str == "tor66":
    skipped 4 lines
    987 1051   append_link()
    988 1052   
    989 1053   if engine_str == "torch":
    990  - for i in data_obj.find_all('dl'):
    991  - name = clear(i.find('a').get_text())
    992  - link = i.find('a')['href']
     1054 + for r in data_obj.select("dl > dt > a"):
     1055 + name = clear(r.get_text())
     1056 + link = clear(r['href'])
    993 1057   append_link()
    994 1058   
    995 1059   if engine_str == "tordex":
    996  - for i in data_obj.find_all('div', attrs={"class": "result mb-3"}):
    997  - a_link = i.find('h5').find('a')
    998  - name = clear(a_link.get_text())
    999  - link = clear(a_link['href'])
     1060 + for r in data_obj.select('.container h5 a'):
     1061 + name = clear(r.get_text())
     1062 + link = clear(r['href'])
    1000 1063   append_link()
    1001 1064   
    1002 1065   if engine_str == "torgle":
    skipped 5 lines
    1008 1071   name = clear(j.get_text())
    1009 1072   append_link()
    1010 1073   
     1074 + if engine_str == "torgle1":
     1075 + for r in data_obj.select("#results a.title"):
     1076 + name = clear(r.get_text())
     1077 + link = clear(r['href'])
     1078 + append_link()
     1079 + 
    1011 1080   if engine_str == "tormax":
    1012  - for i in data_obj.find_all('article'):
    1013  - if i.find('a') is not None and i.find('div') is not None:
    1014  - link = clear(i.find('div', attrs={"class": "url"}).get_text())
    1015  - name = clear(i.find('a', attrs={"class": "title"}).get_text())
    1016  - append_link()
     1081 + for r in data_obj.select("#search-results article a.title"):
     1082 + name = clear(r.get_text())
     1083 + link = clear(r.find_next_sibling('div', {'class': 'url'}).get_text())
     1084 + append_link()
    1017 1085   
    1018 1086   if engine_str == "torsearchengine":
    1019 1087   for i in data_obj.find_all('h3', attrs={'class': 'title text-truncate'}):
    skipped 1 lines
    1021 1089   link = i.find('a')['data-uri']
    1022 1090   append_link()
    1023 1091   
     1092 + if engine_str == "torsearchengine1":
     1093 + for r in data_obj.find_all('span', {'style': 'font-size:1.2em;font-weight:bold;color:#1a0dab'}):
     1094 + name = clear(r.get_text())
     1095 + link = r.find_next_sibling('a')['href']
     1096 + append_link()
     1097 + 
    1024 1098   if engine_str == "visitor":
    1025  - li_tags = data_obj.find_all('li', attrs={'class': 'hs_site'})
    1026  - for i in li_tags:
    1027  - h3tags = i.find_all('h3')
    1028  - for n in h3tags:
    1029  - name = clear(n.find('a').get_text())
    1030  - link = n.find('a')['href']
    1031  - append_link()
     1099 + for r in data_obj.select(".hs_site h3 a"):
     1100 + name = clear(r.get_text())
     1101 + link = clear(r['href'])
     1102 + append_link()
    1032 1103   
    1033 1104   if args.continuous_write and not csv_file.closed:
    1034 1105   csv_file.close()
    skipped 35 lines
    1070 1141   except KeyError:
    1071 1142   print("Error: search engine {} not in the list of supported engines".format(e))
    1072 1143   else:
    1073  - for e in supported_engines:
     1144 + for e in supported_engines.keys():
    1074 1145   if not (args.exclude and len(args.exclude) > 0 and e in args.exclude[0]):
    1075 1146   call_func_as_str(e, args.search)
    1076 1147   
    skipped 1 lines
    1078 1149   
    1079 1150   if not args.continuous_write:
    1080 1151   with open(filename, 'w', newline='') as csv_file:
    1081  - csv_writer = csv.writer(csv_file, delimiter=field_delim, quoting=csv.QUOTE_NONNUMERIC)
     1152 + csv_writer = csv.writer(csv_file, delimiter=field_delim, quoting=csv.QUOTE_ALL)
    1082 1153   for engine in result.keys():
    1083 1154   for i in result[engine]:
    1084 1155   i['engine'] = engine
    skipped 14 lines
Please wait...
Page is in error, reload to recover