| 1 | + | import requests,json |
| 2 | + | from bs4 import BeautifulSoup |
| 3 | + | import argparse |
| 4 | + | from tqdm import tqdm |
| 5 | + | parser = argparse.ArgumentParser() |
| 6 | + | required = parser.add_argument_group('required arguments') |
| 7 | + | parser.add_argument("--proxy", default='localhost:9050', type=str, help="Set Tor proxy (default: 127.0.0.1:9050)") |
| 8 | + | parser.add_argument("--output", default='output.txt', type=str, help="Output File (default: output.txt)") |
| 9 | + | |
| 10 | + | parser.add_argument("--search",type=str, help="search") |
| 11 | + | args = parser.parse_args() |
| 12 | + | proxies = {'http': 'socks5h://{}'.format(args.proxy), 'https': 'socks5h://{}'.format(args.proxy)} |
| 13 | + | |
| 14 | + | def clear(toclear): |
| 15 | + | return(toclear.replace("\n","").replace(" ","")) |
| 16 | + | def clearn(toclear): |
| 17 | + | return(toclear.replace("\n"," ")) |
| 18 | + | |
| 19 | + | def scrape(): |
| 20 | + | result = {} |
| 21 | + | ahmia = "http://msydqstlz2kzerdg.onion/search/?q="+args.search |
| 22 | + | response = requests.get(ahmia, proxies=proxies) |
| 23 | + | #print(response) |
| 24 | + | soup = BeautifulSoup(response.text, 'html.parser') |
| 25 | + | result['ahmia'] = [] |
| 26 | + | #pageNumber = clear(soup.find("span", id="pageResultEnd").get_text()) |
| 27 | + | for i in tqdm(soup.findAll('li', attrs = {'class' : 'result'}),desc="Ahmia"): |
| 28 | + | i = i.find('h4') |
| 29 | + | result['ahmia'].append({"name":clear(i.get_text()),"link":i.find('a')['href'].replace("/search/search/redirect?search_term=search&redirect_url=","")}) |
| 30 | + | |
| 31 | + | urlTorchNumber = "http://xmh57jrzrnw6insl.onion/4a1f6b371c/search.cgi?cmd=Search!&np=1&q=" |
| 32 | + | req = requests.get(urlTorchNumber+args.search,proxies=proxies) |
| 33 | + | soup = BeautifulSoup(req.text, 'html.parser') |
| 34 | + | result['urlTorch'] = [] |
| 35 | + | pageNumber = "" |
| 36 | + | for i in soup.find("table",attrs={"width":"100%"}).findAll("small"): |
| 37 | + | if("of"in i.get_text()): |
| 38 | + | pageNumber = i.get_text() |
| 39 | + | pageNumber = round(float(clear(pageNumber.split("-")[1].split("of")[1]))/10) |
| 40 | + | if(pageNumber>99): |
| 41 | + | pageNumber=99 |
| 42 | + | result['urlTorch'] = [] |
| 43 | + | for n in tqdm(range(1,pageNumber+1),desc="Torch"): |
| 44 | + | urlTorch = "http://xmh57jrzrnw6insl.onion/4a1f6b371c/search.cgi?cmd=Search!&np={}&q={}".format(n,args.search) |
| 45 | + | #print(urlTorch) |
| 46 | + | try: |
| 47 | + | req = requests.get(urlTorchNumber+args.search,proxies=proxies) |
| 48 | + | soup = BeautifulSoup(req.text, 'html.parser') |
| 49 | + | for i in soup.findAll('dl'): |
| 50 | + | result['urlTorch'].append({"name":clear(i.find('a').get_text()),"link":i.find('a')['href']}) |
| 51 | + | except: |
| 52 | + | pass |
| 53 | + | |
| 54 | + | darksearchnumber = "http://darksearch.io/api/search?query=" |
| 55 | + | req = requests.get(darksearchnumber+args.search,proxies=proxies) |
| 56 | + | cookies = req.cookies |
| 57 | + | if(req.status_code==200): |
| 58 | + | result['darksearch']=[] |
| 59 | + | #print(req) |
| 60 | + | req = req.json() |
| 61 | + | if(req['last_page']>30): |
| 62 | + | pageNumber=30 |
| 63 | + | else: |
| 64 | + | pageNumber=req['last_page'] |
| 65 | + | #print(pageNumber) |
| 66 | + | for i in tqdm(range(1,pageNumber+1),desc="Darksearch io"): |
| 67 | + | #print(i) |
| 68 | + | darksearch = "http://darksearch.io/api/search?query={}&page=".format(args.search) |
| 69 | + | req = requests.get(darksearch+str(pageNumber),proxies=proxies,cookies=cookies) |
| 70 | + | if(req.status_code==200): |
| 71 | + | for r in req.json()['data']: |
| 72 | + | result['darksearch'].append({"name":r["title"],"link":r["link"]}) |
| 73 | + | |
| 74 | + | else: |
| 75 | + | print("Rate limit darksearch.io !") |
| 76 | + | |
| 77 | + | result['onionland'] = [] |
| 78 | + | for n in tqdm(range(1,400),desc="OnionLand"): |
| 79 | + | onionland = "http://3bbaaaccczcbdddz.onion/search?q={}&page={}".format(args.search,n) |
| 80 | + | #print(urlTorch) |
| 81 | + | req = requests.get(onionland,proxies=proxies) |
| 82 | + | if(req.status_code==200): |
| 83 | + | soup = BeautifulSoup(req.text, 'html.parser') |
| 84 | + | for i in soup.findAll('div',attrs={"class":"result-block"}): |
| 85 | + | if('''<span class="label-ad">Ad</span>''' not in i): |
| 86 | + | #print({"name":i.find('div',attrs={'class':"title"}).get_text(),"link":clear(i.find('div',attrs={'class':"link"}).get_text())}) |
| 87 | + | result['onionland'].append({"name":i.find('div',attrs={'class':"title"}).get_text(),"link":clear(i.find('div',attrs={'class':"link"}).get_text())}) |
| 88 | + | else: |
| 89 | + | break |
| 90 | + | |
| 91 | + | print("Ahmia : " + str(len(result['ahmia']))) |
| 92 | + | print("Torch : "+str(len(result['urlTorch']))) |
| 93 | + | print("Darksearch io : "+str(len(result['darksearch']))) |
| 94 | + | print("Onionland : "+str(len(result['onionland']))) |
| 95 | + | print("Total of {} links !\nExported to {}".format(str(len(result['ahmia'])+len(result['urlTorch'])+len(result['darksearch'])+len(result['onionland'])),args.output)) |
| 96 | + | f= open(args.output,"w+") |
| 97 | + | for i in result['urlTorch']: |
| 98 | + | f.write("name : {} link: {}\n".format(clearn(i["name"]),i["link"])) |
| 99 | + | for i in result['onionland']: |
| 100 | + | f.write("name: {} link : {}\n".format(clearn(i["name"]),i["link"])) |
| 101 | + | for i in result['ahmia']: |
| 102 | + | f.write("name : {} link : {}\n".format(clearn(i["name"]),i["link"])) |
| 103 | + | for i in result['darksearch']: |
| 104 | + | f.write("name : {} link : {}\n".format(clearn(i["name"]),i["link"])) |
| 105 | + | |
| 106 | + | f.close() |
| 107 | + | scrape() |
| 108 | + | |