STRLCPY/maigret

Merge pull request #28 from soxoj/reports
```
Reports refactoring & improving
```
soxoj committed with GitHub 4 years ago

bc284474

2 parents
2d4ef5d1
5bffa830

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

■ ■ ■ ■ ■ ■

maigret/__main__.py

		skipped 4 lines
5	5		"""
6	6
7	7		import asyncio
8		-	import maigret
9	8
	9	+	import maigret
10	10
11	11		if __name__ == "__main__":
12	12		asyncio.run(maigret.main())
		skipped 1 lines

■ ■ ■ ■ ■ ■

maigret/maigret.py

		skipped 1 lines
2	2		Maigret main module
3	3		"""
4	4
	5	+	import aiohttp
5	6		import asyncio
6	7		import csv
7	8		import http.cookiejar as cookielib
		skipped 2 lines
10	11		import os
11	12		import platform
12	13		import re
	14	+	import requests
13	15		import ssl
14	16		import sys
	17	+	import tqdm.asyncio
	18	+	import xmind
	19	+	from aiohttp_socks import ProxyConnector
15	20		from argparse import ArgumentParser, RawDescriptionHelpFormatter
16	21		from http.cookies import SimpleCookie
17		-
18		-	import aiohttp
19		-	from aiohttp_socks import ProxyConnector
	22	+	from mock import Mock
20	23		from python_socks import _errors as proxy_errors
21		-	import requests
22		-	import tqdm.asyncio
23		-	from mock import Mock
24		-	from socid_extractor import parse, extract
	24	+	from socid_extractor import parse, extract, __version__ as socid_version
25	25
26	26		from .activation import ParsingActivator
27	27		from .notify import QueryNotifyPrint
	28	+	from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
	29	+	generate_report_context, save_txt_report
28	30		from .result import QueryResult, QueryStatus
29	31		from .sites import MaigretDatabase, MaigretSite
30		-	from .report import save_csv_report, genxmindfile, save_html_pdf_report
31		-
32		-	import xmind
33	32
34	33		__version__ = '0.1.10'
35	34
		skipped 481 lines
517	516		return timeout
518	517
519	518
520		-	async def site_self_check(site, logger, semaphore, db: MaigretDatabase, no_progressbar=False):
	519	+	async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
521	520		query_notify = Mock()
522	521		changes = {
523	522		'disabled': False,
		skipped 55 lines
579	578		if changes['disabled'] != site.disabled:
580	579		site.disabled = changes['disabled']
581	580		db.update_site(site)
582		-	action = 'Disabled' if not site.disabled else 'Enabled'
583		-	print(f'{action} site {site.name}...')
	581	+	if not silent:
	582	+	action = 'Disabled' if not site.disabled else 'Enabled'
	583	+	print(f'{action} site {site.name}...')
584	584
585	585		return changes
586	586
587	587
588		-	async def self_check(db: MaigretDatabase, site_data: dict, logger):
	588	+	async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False):
589	589		sem = asyncio.Semaphore(10)
590	590		tasks = []
591	591		all_sites = site_data
		skipped 4 lines
596	596		disabled_old_count = disabled_count(all_sites.values())
597	597
598	598		for _, site in all_sites.items():
599		-	check_coro = site_self_check(site, logger, sem, db)
	599	+	check_coro = site_self_check(site, logger, sem, db, silent)
600	600		future = asyncio.ensure_future(check_coro)
601	601		tasks.append(future)
602	602
		skipped 9 lines
612	612		message = 'Enabled'
613	613		total_disabled *= -1
614	614
615		-	print(f'{message} {total_disabled} checked sites. Run with `--info` flag to get more information')
	615	+	if not silent:
	616	+	print(f'{message} {total_disabled} checked sites. Run with `--info` flag to get more information')
616	617
617	618
618	619		async def main():
619		-	version_string = f"%(prog)s {__version__}\n" + \
620		-	f"{requests.__description__}: {requests.__version__}\n" + \
621		-	f"Python: {platform.python_version()}"
	620	+	version_string = '\n'.join([
	621	+	f'%(prog)s {__version__}',
	622	+	f'Socid-extractor: {socid_version}',
	623	+	f'Aiohttp: {aiohttp.__version__}',
	624	+	f'Requests: {requests.__version__}',
	625	+	f'Python: {platform.python_version()}',
	626	+	])
622	627
623	628		parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
624	629		description=f"Maigret v{__version__}"
		skipped 2 lines
627	632		action="version", version=version_string,
628	633		help="Display version information and dependencies."
629	634		)
630		-	parser.add_argument("--info",
	635	+	parser.add_argument("--info", "-vv",
631	636		action="store_true", dest="info", default=False,
632	637		help="Display service information."
633	638		)
		skipped 1 lines
635	640		action="store_true", dest="verbose", default=False,
636	641		help="Display extra information and metrics."
637	642		)
638		-	parser.add_argument("-d", "--debug",
	643	+	parser.add_argument("-d", "--debug", "-vvv",
639	644		action="store_true", dest="debug", default=False,
640	645		help="Saving debugging information and sites responses in debug.txt."
641	646		)
642		-	parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports",
643		-	help="If using multiple usernames, the output of the results will be saved to this folder."
644		-	)
645		-	parser.add_argument("--csv",
646		-	action="store_true", dest="csv", default=False,
647		-	help="Create Comma-Separated Values (CSV) File."
648		-	)
649		-	parser.add_argument("--html",
650		-	action="store_true", dest="html", default=False,
651		-	help="Create HTML report file."
652		-	)
653	647		parser.add_argument("--site",
654	648		action="append", metavar='SITE_NAME',
655	649		dest="site_list", default=[],
		skipped 59 lines
715	709		dest="tags", default='',
716	710		help="Specify tags of sites."
717	711		)
718		-
719		-	parser.add_argument("-x","--xmind",
	712	+	# reports options
	713	+	parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports",
	714	+	help="If using multiple usernames, the output of the results will be saved to this folder."
	715	+	)
	716	+	parser.add_argument("-T", "--txt",
	717	+	action="store_true", dest="txt", default=False,
	718	+	help="Create a TXT report (one report per username)."
	719	+	)
	720	+	parser.add_argument("-C", "--csv",
	721	+	action="store_true", dest="csv", default=False,
	722	+	help="Create a CSV report (one report per username)."
	723	+	)
	724	+	parser.add_argument("-H", "--html",
	725	+	action="store_true", dest="html", default=False,
	726	+	help="Create an HTML report file (general report on all usernames)."
	727	+	)
	728	+	parser.add_argument("-X","--xmind",
720	729		action="store_true",
721	730		dest="xmind", default=False,
722		-	help="Generate an xmind 8 mindmap"
	731	+	help="Generate an XMind 8 mindmap report (one report per username)."
723	732		)
724		-
725	733		parser.add_argument("-P", "--pdf",
726	734		action="store_true",
727	735		dest="pdf", default=False,
728		-	help="Generate a pdf report"
	736	+	help="Generate a PDF report (general report on all usernames)."
729	737		)
730	738
731	739		args = parser.parse_args()
		skipped 70 lines
802	810		else:
803	811		print('Updates will be applied only for current search session.')
804	812
	813	+	# Make reports folder is not exists
	814	+	os.makedirs(args.folderoutput, exist_ok=True)
	815	+	report_path = args.folderoutput
	816	+
	817	+	# Define one report filename template
	818	+	report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
	819	+
805	820		# Database consistency
806	821		enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
807	822		print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
		skipped 47 lines
855	870		logger=logger,
856	871		forced=args.use_disabled_sites,
857	872		)
	873	+
	874	+	username_result = (username, id_type, results)
858	875		general_results.append((username, id_type, results))
859	876
860		-	if args.folderoutput:
861		-	# The usernames results should be stored in a targeted folder.
862		-	# If the folder doesn't exist, create it first
863		-	os.makedirs(args.folderoutput, exist_ok=True)
864		-	result_path = os.path.join(args.folderoutput, f"{username}.")
865		-	else:
866		-	result_path = os.path.join("reports", f"{username}.")
	877	+	# TODO: tests
	878	+	for website_name in results:
	879	+	dictionary = results[website_name]
	880	+	# TODO: fix no site data issue
	881	+	if not dictionary:
	882	+	continue
	883	+	new_usernames = dictionary.get('ids_usernames')
	884	+	if new_usernames:
	885	+	for u, utype in new_usernames.items():
	886	+	usernames[u] = utype
867	887
	888	+	# reporting for a one username
868	889		if args.xmind:
869		-	genxmindfile(result_path+"xmind", username, results)
	890	+	filename = report_filepath_tpl.format(username=username, postfix='.xmind')
	891	+	save_xmind_report(filename, username, results)
	892	+	print(f'XMind report for {username} saved in {filename}')
870	893
	894	+	if args.csv:
	895	+	filename = report_filepath_tpl.format(username=username, postfix='.csv')
	896	+	save_csv_report(filename, username, results)
	897	+	print(f'CSV report for {username} saved in {filename}')
871	898
872		-	with open(result_path+"txt", "w", encoding="utf-8") as file:
873		-	exists_counter = 0
874		-	for website_name in results:
875		-	dictionary = results[website_name]
876		-	# TODO: fix no site data issue
877		-	if not dictionary:
878		-	continue
879		-	new_usernames = dictionary.get('ids_usernames')
880		-	if new_usernames:
881		-	for u, utype in new_usernames.items():
882		-	usernames[u] = utype
	899	+	if args.txt:
	900	+	filename = report_filepath_tpl.format(username=username, postfix='.txt')
	901	+	save_txt_report(filename, username, results)
	902	+	print(f'TXT report for {username} saved in {filename}')
883	903
884		-	if dictionary.get("status").status == QueryStatus.CLAIMED:
885		-	exists_counter += 1
886		-	file.write(dictionary["url_user"] + "\n")
887		-	file.write(f"Total Websites Username Detected On : {exists_counter}")
888		-	file.close()
	904	+	# reporting for all the result
	905	+	report_context = generate_report_context(general_results)
	906	+	# determine main username
	907	+	username = report_context['username']
889	908
890		-	if args.csv:
891		-	save_csv_report(username, results, result_path+"csv")
	909	+	if args.html:
	910	+	filename = report_filepath_tpl.format(username=username, postfix='.html')
	911	+	save_html_report(filename, report_context)
	912	+	print(f'HTML report on all usernames saved in {filename}')
892	913
893		-	pathPDF = None
894		-	pathHTML = None
895		-	if args.html:
896		-	pathHTML = result_path+"html"
897		-	if args.pdf:
898		-	pathPDF = result_path+"pdf"
	914	+	if args.pdf:
	915	+	filename = report_filepath_tpl.format(username=username, postfix='.pdf')
	916	+	save_pdf_report(filename, report_context)
	917	+	print(f'PDF report on all usernames saved in {filename}')
899	918
900		-	if pathPDF or pathHTML:
901		-	save_html_pdf_report(general_results,pathHTML,pathPDF)
902	919
	920	+	# update database
903	921		db.save_to_file(args.json_file)
904	922
905	923
		skipped 10 lines

■ ■ ■ ■ ■ ■

maigret/notify.py

		skipped 3 lines
4	4		results of queries.
5	5		"""
6	6		import sys
7		-
8	7		from colorama import Fore, Style, init
	8	+
9	9		from .result import QueryStatus
10	10
11	11
		skipped 276 lines

■ ■ ■ ■ ■ ■

maigret/report.py

1	1		import csv
2		-	from datetime import datetime
	2	+	import io
3	3		import logging
4	4		import os
	5	+	import pycountry
5	6		import xmind
6		-	import io
7		-
8		-	from xhtml2pdf import pisa
	7	+	from datetime import datetime
9	8		from jinja2 import Template
10		-
11		-	import pycountry
	9	+	from xhtml2pdf import pisa
	10	+	from dateutil.parser import parse as parse_datetime_str
12	11
13	12		from .result import QueryStatus
14	13		from .utils import is_country_tag, CaseConverter, enrich_link_str
15	14
16		-	def save_csv_report(username: str, results: dict, filename:str):
17		-	with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
18		-	save_csv_report_to_file(username, results, csvfile)
19	15
20		-	def retrive_timestamp(datestring:str):
21		-	first_seen_format = '%Y-%m-%d %H:%M:%S'
22		-	first_seen_formats = '%Y-%m-%dT%H:%M:%S'
23		-	try:
24		-	time = datetime.strptime(datestring, first_seen_format)
25		-	except:
26		-	try:
27		-	time = datetime.strptime(datestring, first_seen_formats)
28		-	except:
29		-	time = datetime.min
30		-	return time
31		-
32		-	def filterSupposedData(data):
	16	+	'''
	17	+	UTILS
	18	+	'''
	19	+	def filter_supposed_data(data):
33	20		### interesting fields
34		-	allowed_fields = ['fullname', 'gender', 'location']
	21	+	allowed_fields = ['fullname', 'gender', 'location', 'age']
35	22		filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
36	23		for k, v in data.items()
37	24		if k in allowed_fields}
38	25		return filtered_supposed_data
39	26
40		-	def generate_template(pdf:bool):
41		-	# template generation
42		-	if(pdf):
43		-	template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
44		-	"resources/simple_report_pdf.tpl")).read()
	27	+
	28	+	'''
	29	+	REPORTS SAVING
	30	+	'''
	31	+	def save_csv_report(filename: str, username: str, results: dict):
	32	+	with open(filename, 'w', newline='', encoding='utf-8') as f:
	33	+	generate_csv_report(username, results, f)
	34	+
	35	+
	36	+	def save_txt_report(filename: str, username: str, results: dict):
	37	+	with open(filename, 'w', encoding='utf-8') as f:
	38	+	generate_txt_report(username, results, f)
	39	+
	40	+
	41	+	def save_html_report(filename: str, context: dict):
	42	+	template, _ = generate_report_template(is_pdf=False)
	43	+	filled_template = template.render(**context)
	44	+	with open(filename, 'w') as f:
	45	+	f.write(filled_template)
	46	+
	47	+
	48	+	def save_pdf_report(filename: str, context: dict):
	49	+	template, css = generate_report_template(is_pdf=True)
	50	+	filled_template = template.render(**context)
	51	+	with open(filename, 'w+b') as f:
	52	+	pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
	53	+
	54	+
	55	+	'''
	56	+	REPORTS GENERATING
	57	+	'''
	58	+	def generate_report_template(is_pdf: bool):
	59	+	"""
	60	+	HTML/PDF template generation
	61	+	"""
	62	+	def get_resource_content(filename):
	63	+	return open(os.path.join(maigret_path, 'resources', filename)).read()
	64	+
	65	+	maigret_path = os.path.dirname(os.path.realpath(__file__))
	66	+
	67	+	if is_pdf:
	68	+	template_content = get_resource_content('simple_report_pdf.tpl')
	69	+	css_content = get_resource_content('simple_report_pdf.css')
45	70		else:
46		-	template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
47		-	"resources/simple_report.tpl")).read()
48		-	template = Template(template_text)
	71	+	template_content = get_resource_content('simple_report.tpl')
	72	+	css_content = None
	73	+
	74	+	template = Template(template_content)
49	75		template.globals['title'] = CaseConverter.snake_to_title
50	76		template.globals['detect_link'] = enrich_link_str
51		-	return template
	77	+	return template, css_content
52	78
53		-	def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf:str=None):
	79	+
	80	+	def generate_report_context(username_results: list):
54	81		brief_text = []
55	82		usernames = {}
56	83		extended_info_count = 0
		skipped 27 lines
84	111		if first_seen is None:
85	112		first_seen = created_at
86	113		else:
87		-	known_time = retrive_timestamp(first_seen)
88		-	new_time = retrive_timestamp(created_at)
89		-	if new_time < known_time:
90		-	first_seen = created_at
	114	+	try:
	115	+	known_time = parse_datetime_str(first_seen)
	116	+	new_time = parse_datetime_str(created_at)
	117	+	if new_time < known_time:
	118	+	first_seen = created_at
	119	+	except:
	120	+	logging.debug('Problems with converting datetime %s/%s', first_seen, created_at)
91	121
92	122		for k, v in status.ids_data.items():
93	123		# suppose target data
		skipped 55 lines
149	179		countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
150	180		interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items()))
151	181
152		-	filtered_supposed_data = filterSupposedData(supposed_data)
153		-
154		-	# save report in HTML
155		-	if(filename is not None):
156		-	template = generate_template(False)
157		-	filled_template = template.render(username=first_username,
158		-	brief=brief,
159		-	results=username_results,
160		-	first_seen=first_seen,
161		-	interests_tuple_list=tuple_sort(interests_list),
162		-	countries_tuple_list=tuple_sort(countries_lists),
163		-	supposed_data=filtered_supposed_data,
164		-	generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
165		-	)
166		-	with open(filename, 'w') as f:
167		-	f.write(filled_template)
168		-	f.close()
169		-	# save report in PDF
170		-	if(filenamepdf is not None):
171		-	template = generate_template(True)
172		-	filled_template = template.render(username=first_username,
173		-	brief=brief,
174		-	results=username_results,
175		-	first_seen=first_seen,
176		-	interests_tuple_list=tuple_sort(interests_list),
177		-	countries_tuple_list=tuple_sort(countries_lists),
178		-	supposed_data=filtered_supposed_data,
179		-	generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
180		-	)
181		-	csstext = ""
182		-	with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
183		-	"resources/simple_report_pdf.css"), "r") as cssfile:
184		-	cssline = cssfile.readline()
185		-	csstext += cssline
186		-	while cssline:
187		-	cssline = cssfile.readline()
188		-	csstext += cssline
189		-	cssfile.close()
	182	+	filtered_supposed_data = filter_supposed_data(supposed_data)
190	183
191		-	pdffile = open(filenamepdf, "w+b")
192		-	pisa.pisaDocument(io.StringIO(filled_template), dest=pdffile, default_css=csstext)
193		-	pdffile.close()
	184	+	return {
	185	+	'username': first_username,
	186	+	'brief': brief,
	187	+	'results': username_results,
	188	+	'first_seen': first_seen,
	189	+	'interests_tuple_list': tuple_sort(interests_list),
	190	+	'countries_tuple_list': tuple_sort(countries_lists),
	191	+	'supposed_data': filtered_supposed_data,
	192	+	'generated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
	193	+	}
194	194
195	195
196		-	def save_csv_report_to_file(username: str, results: dict, csvfile):
197		-	print(results)
	196	+	def generate_csv_report(username: str, results: dict, csvfile):
198	197		writer = csv.writer(csvfile)
199	198		writer.writerow(['username',
200	199		'name',
		skipped 12 lines
213	212		results[site]['http_status'],
214	213		])
215	214
	215	+
	216	+	def generate_txt_report(username: str, results: dict, file):
	217	+	exists_counter = 0
	218	+	for website_name in results:
	219	+	dictionary = results[website_name]
	220	+	# TODO: fix no site data issue
	221	+	if not dictionary:
	222	+	continue
	223	+	if dictionary.get("status").status == QueryStatus.CLAIMED:
	224	+	exists_counter += 1
	225	+	file.write(dictionary["url_user"] + "\n")
	226	+	file.write(f'Total Websites Username Detected On : {exists_counter}')
	227	+
216	228		'''
217	229		XMIND 8 Functions
218	230		'''
219		-	def genxmindfile(filename, username, results):
220		-	print(f'Generating XMIND8 file for username {username}')
	231	+	def save_xmind_report(filename, username, results):
221	232		if os.path.exists(filename):
222	233		os.remove(filename)
223	234		workbook = xmind.load(filename)
		skipped 62 lines
286	297		supposed_data[field].append(currentval)
287	298		currentsublabel.setTitle("%s: %s" % (k, currentval))
288	299		### Add Supposed DATA
289		-	filterede_supposed_data = filterSupposedData(supposed_data)
	300	+	filterede_supposed_data = filter_supposed_data(supposed_data)
290	301		if(len(filterede_supposed_data) >0):
291	302		undefinedsection = root_topic1.addSubTopic()
292	303		undefinedsection.setTitle("SUPPOSED DATA")
		skipped 6 lines

■ ■ ■ ■ ■ ■

maigret/resources/data.json

		skipped 8900 lines
8901	8901		"usernameClaimed": "red",
8902	8902		"usernameUnclaimed": "noonewouldeverusethis7"
8903	8903		},
8904		-	"NameMC (Minecraft.net skins)": {
	8904	+	"NameMC": {
8905	8905		"tags": [
8906	8906		"us"
8907	8907		],
	8908	+	"regexCheck": "^.{3,16}$",
8908	8909		"checkType": "message",
8909		-	"absenceStrs": "Profiles: 0 results",
	8910	+	"presenseStrs": "/profile/",
	8911	+	"absenceStrs": "<div class=\"col-lg-5 order-lg-2\">\n </div>",
8910	8912		"alexaRank": 10151,
8911	8913		"url": "https://namemc.com/profile/{username}",
8912	8914		"urlMain": "https://namemc.com/",
		skipped 4503 lines
13416	13418		"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
13417	13419		"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
13418	13420		"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
13419		-	"x-guest-token": "1349509919867854849"
	13421	+	"x-guest-token": "1350185014307254275"
13420	13422		},
13421	13423		"errors": {
13422	13424		"Bad guest token": "x-guest-token update required"
13423	13425		},
13424		-	"urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
13425		-	"checkType": "message",
13426		-	"absenceStrs": "Not found",
13427		-	"alexaRank": 55,
13428	13426		"activation": {
13429	13427		"method": "twitter",
13430	13428		"marks": [
		skipped 3 lines
13434	13432		"src": "guest_token",
13435	13433		"dst": "x-guest-token"
13436	13434		},
	13435	+	"urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
	13436	+	"checkType": "message",
	13437	+	"absenceStrs": "Not found",
	13438	+	"alexaRank": 55,
13437	13439		"url": "https://twitter.com/{username}",
13438	13440		"urlMain": "https://www.twitter.com/",
13439	13441		"usernameClaimed": "blue",
		skipped 8853 lines

■ ■ ■ ■ ■ ■

maigret/resources/simple_report.tpl

		skipped 19 lines
20	20		<h4 class="mb-0">
21	21		<a class="blog-header-logo text-dark" href="#">Username search report for {{ username }}</a>
22	22		</h4>
23		-	<small class="text-muted">Generated at {{ generated_at }}</small>
	23	+	<small class="text-muted">Generated by <a href="https://github.com/soxoj/maigret">Maigret</a> at {{ generated_at }}</small>
24	24		</div>
25	25		</div>
26	26		<div class="row-mb">
		skipped 83 lines

■ ■ ■ ■ ■ ■

maigret/resources/simple_report_pdf.tpl

		skipped 10 lines
11	11		<h2 class="mb-0">
12	12		Username search report for {{ username }}
13	13		</h2>
14		-	<small>Generated at {{ generated_at }}</small>
	14	+	<small>Generated by <a href="https://github.com/soxoj/maigret">Maigret</a> at {{ generated_at }}</small>
15	15		</div>
16	16		</div>
	17	+	<br/><br/>
17	18		<div>
18	19		<div>
19	20		<div>
		skipped 21 lines
41	42		</div>
42	43		</div>
43	44		</div>
	45	+	<br/>
44	46		<div>
45	47		<div>
46	48		<div>
		skipped 10 lines
57	59		{% for k, v in data.items() %}
58	60		{% if v.found and not v.is_similar %}
59	61		<split></split>
	62	+	<hr>
60	63		<br/>
61	64		<div class="sitebox" style="margin-top: 20px;" >
62	65		<div>
63	66		<div>
64	67		<table>
65	68		<tr>
66		-	<td style="width:201px;" >
67		-	<img alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
68		-	</td>
69		-	<td style="width:10px;" ></td>
70	69		<td valign="top">
71	70		<div class="textbox" style="padding-top: 10px;" >
72	71		<h3>
		skipped 6 lines
79	78		<a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
80	79		</p>
81	80		</div>
	81	+	{% if v.ids_data %}
	82	+	<div style="clear:both;"></div>
	83	+	<div style="width:100%">
	84	+	<br/>
	85	+	<h4>Details</h4>
	86	+	<table class="table table-striped;" style="margin-top:5px;">
	87	+	<tbody>
	88	+	{% for k1, v1 in v.ids_data.items() %}
	89	+	{% if k1 != 'image' %}
	90	+	<tr>
	91	+	<th style="width:200px;">{{ title(k1) }}</th>
	92	+	<td>{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 \| join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}</td>
	93	+	</tr>
	94	+	{% endif %}
	95	+
	96	+	{% endfor %}
	97	+	</tbody>
	98	+	</table>
	99	+	</div>
	100	+	{% endif %}
	101	+	</td>
	102	+	<td style="width:201px; position: relative;" valign="top">
	103	+	<img alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
82	104		</td>
83	105		</tr>
84	106		</table>
85		-	{% if v.ids_data %}
86		-	<div style="clear:both;"></div>
87		-	<div style="width:100%">
88		-	<br/>
89		-	<h4>Details</h4>
90		-	<table class="table table-striped;" style="margin-top:5px;">
91		-	<tbody>
92		-	{% for k1, v1 in v.ids_data.items() %}
93		-	{% if k1 != 'image' %}
94		-	<tr>
95		-	<th style="width:100px;">{{ title(k1) }}</th>
96		-	<td>{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 \| join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}</td>
97		-	</tr>
98		-	{% endif %}
99		-
100		-	{% endfor %}
101		-	</tbody>
102		-	</table>
103		-	</div>
104		-	{% endif %}
105	107		</div>
106	108		</div>
107	109		</div>
		skipped 6 lines

■ ■ ■ ■ ■ ■

maigret/sites.py

		skipped 2 lines
3	3		import copy
4	4		import json
5	5		import operator
	6	+	import requests
6	7		import sys
7		-
8		-	import requests
9	8
10	9		from .utils import CaseConverter
11	10
		skipped 248 lines

■ ■ ■ ■ ■ ■

requirements.txt

		skipped 7 lines
8	8		certifi==2020.12.5
9	9		chardet==3.0.4
10	10		colorama==0.4.4
	11	+	python-dateutil==2.8.1
11	12		future==0.18.2
12	13		future-annotations==1.0.0
13	14		html5lib==1.1
		skipped 13 lines
27	28		requests==2.25.1
28	29		requests-futures==1.0.0
29	30		six==1.15.0
30		-	socid-extractor==0.0.2
	31	+	socid-extractor>0.0.2
31	32		soupsieve==2.1
32	33		stem==1.8.0
33	34		torrequest==0.1.0
		skipped 8 lines

■ ■ ■ ■ ■ ■

tests/conftest.py

	1	+	import glob
	2	+	import logging
	3	+	import os
	4	+	import pytest
1	5		from _pytest.mark import Mark
2	6		from mock import Mock
3		-	import os
4		-	import pytest
5	7
6	8		from maigret.sites import MaigretDatabase, MaigretSite
7	9
8		-	JSON_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../maigret/resources/data.json')
	10	+	CUR_PATH = os.path.dirname(os.path.realpath(__file__))
	11	+	JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
9	12		empty_mark = Mark('', [], {})
10	13
11	14
		skipped 5 lines
17	20		items.sort(key=by_slow_marker, reverse=False)
18	21
19	22
	23	+	def get_test_reports_filenames():
	24	+	return glob.glob(os.path.join('report_*'), recursive=False)
	25	+
	26	+
	27	+	def remove_test_reports():
	28	+	reports_list = get_test_reports_filenames()
	29	+	for f in reports_list: os.remove(f)
	30	+	logging.error(f'Removed test reports {reports_list}')
	31	+
	32	+
20	33		@pytest.fixture(scope='session')
21	34		def default_db():
22	35		db = MaigretDatabase().load_from_file(JSON_FILE)
23	36
24	37		return db
25	38
	39	+
	40	+	@pytest.fixture(autouse=True)
	41	+	def reports_autoclean():
	42	+	remove_test_reports()
	43	+	yield
	44	+	remove_test_reports()
	45	+

■ ■ ■ ■ ■ ■

tests/test_activation.py

1	1		"""Maigret activation test functions"""
2		-	from mock import Mock
3	2		import pytest
	3	+	from mock import Mock
4	4
5	5		from maigret.activation import ParsingActivator
6	6
		skipped 11 lines

■ ■ ■ ■ ■ ■

tests/test_maigret.py

1	1		"""Maigret main module test functions"""
2	2		import asyncio
3		-	from mock import Mock
4	3		import pytest
	4	+	from mock import Mock
5	5
	6	+	from maigret.maigret import self_check
6	7		from maigret.sites import MaigretDatabase, MaigretSite
7		-	from maigret.maigret import self_check
8		-
9	8
10	9		EXAMPLE_DB = {
11	10		'engines': {
		skipped 42 lines
54	53		assert db.sites[0].disabled == False
55	54
56	55		loop = asyncio.get_event_loop()
57		-	loop.run_until_complete(self_check(db, db.sites_dict, logger))
	56	+	loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
58	57
59	58		assert db.sites[0].disabled == True
60	59
		skipped 9 lines
70	69		assert db.sites[0].disabled == True
71	70
72	71		loop = asyncio.get_event_loop()
73		-	loop.run_until_complete(self_check(db, db.sites_dict, logger))
	72	+	loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
74	73
75	74		assert db.sites[0].disabled == False
76	75
		skipped 8 lines
85	84		assert db.sites[0].disabled == True
86	85
87	86		loop = asyncio.get_event_loop()
88		-	loop.run_until_complete(self_check(db, db.sites_dict, logger))
	87	+	loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
89	88
90	89		assert db.sites[0].disabled == True
91	90
		skipped 9 lines
101	100		assert db.sites[0].disabled == False
102	101
103	102		loop = asyncio.get_event_loop()
104		-	loop.run_until_complete(self_check(db, db.sites_dict, logger))
	103	+	loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
105	104
106	105		assert db.sites[0].disabled == False
107	106

■ ■ ■ ■ ■ ■

tests/test_report.py

1	1		"""Maigret reports test functions"""
2		-	from io import StringIO
3	2		import copy
4	3		import os
	4	+	from io import StringIO
5	5
6	6		import xmind
	7	+	from jinja2 import Template
7	8
8		-	from maigret.report import save_csv_report_to_file, genxmindfile, save_html_pdf_report
	9	+	from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
	10	+	save_pdf_report, generate_report_template, generate_report_context
9	11		from maigret.result import QueryResult, QueryStatus
10		-
11	12
12	13		EXAMPLE_RESULTS = {
13	14		'GitHub': {
		skipped 42 lines
56	57		SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
57	58
58	59
59		-	def test_save_csv_report_to_file():
	60	+	def test_generate_report_template():
	61	+	report_template, css = generate_report_template(is_pdf=True)
	62	+
	63	+	assert isinstance(report_template, Template)
	64	+	assert isinstance(css, str)
	65	+
	66	+	report_template, css = generate_report_template(is_pdf=False)
	67	+
	68	+	assert isinstance(report_template, Template)
	69	+	assert css is None
	70	+
	71	+
	72	+	def test_generate_csv_report():
60	73		csvfile = StringIO()
61		-	save_csv_report_to_file('test', EXAMPLE_RESULTS, csvfile)
	74	+	generate_csv_report('test', EXAMPLE_RESULTS, csvfile)
62	75
63	76		csvfile.seek(0)
64	77		data = csvfile.readlines()
65	78
66	79		assert data == [
67		-	'username,name,url_main,url_user,exists,http_status\r\n',
68		-	'test,GitHub,https://www.github.com/,https://www.github.com/test,Claimed,200\r\n',
	80	+	'username,name,url_main,url_user,exists,http_status\r\n',
	81	+	'test,GitHub,https://www.github.com/,https://www.github.com/test,Claimed,200\r\n',
	82	+	]
	83	+
	84	+
	85	+	def test_generate_txt_report():
	86	+	txtfile = StringIO()
	87	+	generate_txt_report('test', EXAMPLE_RESULTS, txtfile)
	88	+
	89	+	txtfile.seek(0)
	90	+	data = txtfile.readlines()
	91	+
	92	+	assert data == [
	93	+	'https://www.github.com/test\n',
	94	+	'Total Websites Username Detected On : 1',
69	95		]
70	96
71	97
72	98		def test_save_xmind_report():
73		-	filename = 'test_report.xmind'
74		-	genxmindfile(filename, 'test', EXAMPLE_RESULTS)
	99	+	filename = 'report_test.xmind'
	100	+	save_xmind_report(filename, 'test', EXAMPLE_RESULTS)
75	101
76	102		workbook = xmind.load(filename)
77	103		sheet = workbook.getPrimarySheet()
		skipped 9 lines
87	113
88	114
89	115		def test_html_report():
90		-	report_name = 'report_alexaimephotographycars.html'
91		-	try:
92		-	os.remove(report_name)
93		-	except:
94		-	pass
95		-
96		-	save_html_pdf_report(TEST,filename=report_name,filenamepdf=None)
97		-	assert os.path.exists(report_name)
	116	+	report_name = 'report_test.html'
	117	+	context = generate_report_context(TEST)
	118	+	save_html_report(report_name, context)
98	119
99	120		report_text = open(report_name).read()
100	121
		skipped 1 lines
102	123		assert SUPPOSED_GEO in report_text
103	124		assert SUPPOSED_INTERESTS in report_text
104	125
	126	+
105	127		def test_pdf_report():
106		-	report_name_pdf = 'report_alexaimephotographycars.pdf'
107		-	try:
108		-	os.remove(report_name_pdf)
109		-	except:
110		-	pass
	128	+	report_name = 'report_test.pdf'
	129	+	context = generate_report_context(TEST)
	130	+	save_pdf_report(report_name, context)
111	131
112		-	save_html_pdf_report(TEST,filename=None,filenamepdf=report_name_pdf)
113		-	assert os.path.exists(report_name_pdf)
	132	+	assert os.path.exists(report_name)
114	133

Merge pull request #28 from soxoj/reports