Projects STRLCPY OnionSearch Commits ea95d3a4
🤬
  • ■ ■ ■ ■ ■ ■
    search.py
     1 +import requests,json
     2 +from bs4 import BeautifulSoup
     3 +import argparse
     4 +from tqdm import tqdm
     5 +parser = argparse.ArgumentParser()
     6 +required = parser.add_argument_group('required arguments')
     7 +parser.add_argument("--proxy", default='localhost:9050', type=str, help="Set Tor proxy (default: 127.0.0.1:9050)")
     8 +parser.add_argument("--output", default='output.txt', type=str, help="Output File (default: output.txt)")
     9 + 
     10 +parser.add_argument("--search",type=str, help="search")
     11 +args = parser.parse_args()
     12 +proxies = {'http': 'socks5h://{}'.format(args.proxy), 'https': 'socks5h://{}'.format(args.proxy)}
     13 + 
     14 +def clear(toclear):
     15 + return(toclear.replace("\n","").replace(" ",""))
     16 +def clearn(toclear):
     17 + return(toclear.replace("\n"," "))
     18 + 
     19 +def scrape():
     20 + result = {}
     21 + ahmia = "http://msydqstlz2kzerdg.onion/search/?q="+args.search
     22 + response = requests.get(ahmia, proxies=proxies)
     23 + #print(response)
     24 + soup = BeautifulSoup(response.text, 'html.parser')
     25 + result['ahmia'] = []
     26 + #pageNumber = clear(soup.find("span", id="pageResultEnd").get_text())
     27 + for i in tqdm(soup.findAll('li', attrs = {'class' : 'result'}),desc="Ahmia"):
     28 + i = i.find('h4')
     29 + result['ahmia'].append({"name":clear(i.get_text()),"link":i.find('a')['href'].replace("/search/search/redirect?search_term=search&redirect_url=","")})
     30 + 
     31 + urlTorchNumber = "http://xmh57jrzrnw6insl.onion/4a1f6b371c/search.cgi?cmd=Search!&np=1&q="
     32 + req = requests.get(urlTorchNumber+args.search,proxies=proxies)
     33 + soup = BeautifulSoup(req.text, 'html.parser')
     34 + result['urlTorch'] = []
     35 + pageNumber = ""
     36 + for i in soup.find("table",attrs={"width":"100%"}).findAll("small"):
     37 + if("of"in i.get_text()):
     38 + pageNumber = i.get_text()
     39 + pageNumber = round(float(clear(pageNumber.split("-")[1].split("of")[1]))/10)
     40 + if(pageNumber>99):
     41 + pageNumber=99
     42 + result['urlTorch'] = []
     43 + for n in tqdm(range(1,pageNumber+1),desc="Torch"):
     44 + urlTorch = "http://xmh57jrzrnw6insl.onion/4a1f6b371c/search.cgi?cmd=Search!&np={}&q={}".format(n,args.search)
     45 + #print(urlTorch)
     46 + try:
     47 + req = requests.get(urlTorchNumber+args.search,proxies=proxies)
     48 + soup = BeautifulSoup(req.text, 'html.parser')
     49 + for i in soup.findAll('dl'):
     50 + result['urlTorch'].append({"name":clear(i.find('a').get_text()),"link":i.find('a')['href']})
     51 + except:
     52 + pass
     53 + 
     54 + darksearchnumber = "http://darksearch.io/api/search?query="
     55 + req = requests.get(darksearchnumber+args.search,proxies=proxies)
     56 + cookies = req.cookies
     57 + if(req.status_code==200):
     58 + result['darksearch']=[]
     59 + #print(req)
     60 + req = req.json()
     61 + if(req['last_page']>30):
     62 + pageNumber=30
     63 + else:
     64 + pageNumber=req['last_page']
     65 + #print(pageNumber)
     66 + for i in tqdm(range(1,pageNumber+1),desc="Darksearch io"):
     67 + #print(i)
     68 + darksearch = "http://darksearch.io/api/search?query={}&page=".format(args.search)
     69 + req = requests.get(darksearch+str(pageNumber),proxies=proxies,cookies=cookies)
     70 + if(req.status_code==200):
     71 + for r in req.json()['data']:
     72 + result['darksearch'].append({"name":r["title"],"link":r["link"]})
     73 + 
     74 + else:
     75 + print("Rate limit darksearch.io !")
     76 + 
     77 + result['onionland'] = []
     78 + for n in tqdm(range(1,400),desc="OnionLand"):
     79 + onionland = "http://3bbaaaccczcbdddz.onion/search?q={}&page={}".format(args.search,n)
     80 + #print(urlTorch)
     81 + req = requests.get(onionland,proxies=proxies)
     82 + if(req.status_code==200):
     83 + soup = BeautifulSoup(req.text, 'html.parser')
     84 + for i in soup.findAll('div',attrs={"class":"result-block"}):
     85 + if('''<span class="label-ad">Ad</span>''' not in i):
     86 + #print({"name":i.find('div',attrs={'class':"title"}).get_text(),"link":clear(i.find('div',attrs={'class':"link"}).get_text())})
     87 + result['onionland'].append({"name":i.find('div',attrs={'class':"title"}).get_text(),"link":clear(i.find('div',attrs={'class':"link"}).get_text())})
     88 + else:
     89 + break
     90 + 
     91 + print("Ahmia : " + str(len(result['ahmia'])))
     92 + print("Torch : "+str(len(result['urlTorch'])))
     93 + print("Darksearch io : "+str(len(result['darksearch'])))
     94 + print("Onionland : "+str(len(result['onionland'])))
     95 + print("Total of {} links !\nExported to {}".format(str(len(result['ahmia'])+len(result['urlTorch'])+len(result['darksearch'])+len(result['onionland'])),args.output))
     96 + f= open(args.output,"w+")
     97 + for i in result['urlTorch']:
     98 + f.write("name : {} link: {}\n".format(clearn(i["name"]),i["link"]))
     99 + for i in result['onionland']:
     100 + f.write("name: {} link : {}\n".format(clearn(i["name"]),i["link"]))
     101 + for i in result['ahmia']:
     102 + f.write("name : {} link : {}\n".format(clearn(i["name"]),i["link"]))
     103 + for i in result['darksearch']:
     104 + f.write("name : {} link : {}\n".format(clearn(i["name"]),i["link"]))
     105 + 
     106 + f.close()
     107 +scrape()
     108 + 
Please wait...
Page is in error, reload to recover