1 | 1 | | from googlesearch import search |
| 2 | + | from datetime import datetime |
2 | 3 | | import requests,re,PyPDF2,tabula,os |
3 | 4 | | |
4 | 5 | | def PDFcheck(mail,_verbose=None): |
5 | 6 | | domain = mail.split("@")[1] |
6 | 7 | | term = "site:"+domain+" filetype:PDF intext:"+'"'+"email"+'"' |
| 8 | + | emails = "" |
7 | 9 | | try: |
8 | 10 | | data = search(term, num_results=5) |
9 | 11 | | for i in data: |
| skipped 10 lines |
20 | 22 | | if(findPDFs[0] is not None): |
21 | 23 | | for pdfs in findPDFs: |
22 | 24 | | print(pdfs) |
| 25 | + | emails = emails + "\n" + pdfs |
| 26 | + | with open((datetime.today().strftime('%Y-%m-%d-'+domain)+".txt"), "w") as f: |
| 27 | + | f.write(emails) |
| 28 | + | f.close() |
23 | 29 | | except: |
24 | 30 | | pass |
25 | 31 | | pdfFileObj.close() |
| skipped 4 lines |
30 | 36 | | os.remove("out.txt") |
31 | 37 | | except: |
32 | 38 | | print("PDF Search error!") |
| 39 | + | |