STRLCPY/maigret

Merge pull request #164 from soxoj/dns-checks-some-fixes

Added some domains for new DNS checker, fixed reports generation crashes

soxoj committed with GitHub 3 years ago

5c8f7a3a

2 parents
5179cb56
13e1b6f4

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

Total 8 files Show one by one

■ ■ ■ ■ ■ ■

maigret/checking.py

		skipped 737 lines
738	738
739	739
740	740		async def site_self_check(
741		-	site: MaigretSite, logger, semaphore, db: MaigretDatabase, silent=False, tor_proxy=None
	741	+	site: MaigretSite,
	742	+	logger,
	743	+	semaphore,
	744	+	db: MaigretDatabase,
	745	+	silent=False,
	746	+	tor_proxy=None,
742	747		):
743	748		changes = {
744	749		"disabled": False,
		skipped 67 lines
812	817
813	818
814	819		async def self_check(
815		-	db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10,
816		-	tor_proxy=None
	820	+	db: MaigretDatabase,
	821	+	site_data: dict,
	822	+	logger,
	823	+	silent=False,
	824	+	max_connections=10,
	825	+	tor_proxy=None,
817	826		) -> bool:
818	827		sem = asyncio.Semaphore(max_connections)
819	828		tasks = []
		skipped 32 lines

■ ■ ■ ■ ■ ■

maigret/maigret.py

		skipped 528 lines
529	529		if args.self_check:
530	530		print('Maigret sites database self-checking...')
531	531		is_need_update = await self_check(
532		-	db, site_data, logger, max_connections=args.connections,
533		-	tor_proxy=args.tor_proxy
	532	+	db,
	533	+	site_data,
	534	+	logger,
	535	+	max_connections=args.connections,
	536	+	tor_proxy=args.tor_proxy,
534	537		)
535	538		if is_need_update:
536	539		if input('Do you want to save changes permanently? [Yn]\n').lower() in (
		skipped 162 lines

■ ■ ■ ■ ■ ■

maigret/report.py

		skipped 39 lines
40	40		return dict(
41	41		sorted(
42	42		results.items(),
43		-	key=lambda x: len((x[1].get('status') and x[1]['status'].ids_data or {}).keys()),
	43	+	key=lambda x: len(
	44	+	(x[1].get('status') and x[1]['status'].ids_data or {}).keys()
	45	+	),
44	46		reverse=True,
45	47		)
46	48		)
		skipped 206 lines
253	255		["username", "name", "url_main", "url_user", "exists", "http_status"]
254	256		)
255	257		for site in results:
	258	+	# TODO: fix the reason
	259	+	status = 'Unknown'
	260	+	if "status" in results[site]:
	261	+	status = str(results[site]["status"].status)
256	262		writer.writerow(
257	263		[
258	264		username,
259	265		site,
260		-	results[site]["url_main"],
261		-	results[site]["url_user"],
262		-	str(results[site]["status"].status),
263		-	results[site]["http_status"],
	266	+	results[site].get("url_main", ""),
	267	+	results[site].get("url_user", ""),
	268	+	status,
	269	+	results[site].get("http_status", 0),
264	270		]
265	271		)
266	272
		skipped 5 lines
272	278		# TODO: fix no site data issue
273	279		if not dictionary:
274	280		continue
275		-	if dictionary.get("status").status == QueryStatus.CLAIMED:
	281	+	if (
	282	+	dictionary.get("status")
	283	+	and dictionary["status"].status == QueryStatus.CLAIMED
	284	+	):
276	285		exists_counter += 1
277	286		file.write(dictionary["url_user"] + "\n")
278	287		file.write(f"Total Websites Username Detected On : {exists_counter}")
		skipped 6 lines
285	294		for sitename in results:
286	295		site_result = results[sitename]
287	296		# TODO: fix no site data issue
288		-	if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
	297	+	if not site_result or not site_result.get("status"):
	298	+	continue
	299	+
	300	+	if site_result["status"].status != QueryStatus.CLAIMED:
289	301		continue
290	302
291	303		data = dict(site_result)
		skipped 53 lines
345	357		if not dictionary:
346	358		continue
347	359		result_status = dictionary.get("status")
	360	+	# TODO: fix the reason
348	361		if not result_status or result_status.status != QueryStatus.CLAIMED:
349	362		continue
350	363
		skipped 36 lines

■ ■ ■ ■ ■ ■ ■

maigret/resources/data.json

		skipped 13023 lines
13024	13024		"us"
13025	13025		],
13026	13026		"headers": {
13027		-	"authorization": "Bearer BQBKzy1QSQQO4wR2vRVROUOaj8T9gr0Vkjup9wUkLh0MZDtMEVZ0WEtyoZ_tTc4utIhyvvn9V7URwVWGeuU"
	13027	+	"authorization": "Bearer BQDEpoSTjg2Ko86QUHZjJmZvp5AuI1ru6rJySe8_cD0bRqMZk6PfmdsmJBu3QeiNHgUPGQPDz2VeSvRr16w"
13028	13028		},
13029	13029		"errors": {
13030	13030		"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
		skipped 1419 lines
14450	14450		"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
14451	14451		"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
14452	14452		"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
14453		-	"x-guest-token": "1397644352072163331"
	14453	+	"x-guest-token": "1400174453577900043"
14454	14454		},
14455	14455		"errors": {
14456	14456		"Bad guest token": "x-guest-token update required"
		skipped 400 lines
14857	14857		"video"
14858	14858		],
14859	14859		"headers": {
14860		-	"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjIwNjAyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.RBYc81QRYfs9m7yzcGkUXhyA3rGPhQJaoAG8dnt61I4"
	14860	+	"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjI2NjM1MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.bKcisdrE5nJZMvrbagUC8lZQOs9spg3IKMlK15IclM4"
14861	14861		},
14862	14862		"activation": {
14863	14863		"url": "https://vimeo.com/_rv/viewer",
		skipped 12916 lines
27780	27780		"url": "{username}.com",
27781	27781		"urlMain": "{username}.com",
27782	27782		"usernameClaimed": "soxoj",
	27783	+	"usernameUnclaimed": "noonewouldeverusethis7",
	27784	+	"checkType": "status_code"
	27785	+	},
	27786	+	".pro": {
	27787	+	"protocol": "dns",
	27788	+	"url": "{username}.pro",
	27789	+	"urlMain": "{username}.pro",
	27790	+	"usernameClaimed": "alex",
	27791	+	"usernameUnclaimed": "noonewouldeverusethis7",
	27792	+	"checkType": "status_code"
	27793	+	},
	27794	+	".me": {
	27795	+	"protocol": "dns",
	27796	+	"url": "{username}.me",
	27797	+	"urlMain": "{username}.me",
	27798	+	"usernameClaimed": "alex",
	27799	+	"usernameUnclaimed": "noonewouldeverusethis7",
	27800	+	"checkType": "status_code"
	27801	+	},
	27802	+	".biz": {
	27803	+	"protocol": "dns",
	27804	+	"url": "{username}.biz",
	27805	+	"urlMain": "{username}.biz",
	27806	+	"usernameClaimed": "alex",
	27807	+	"usernameUnclaimed": "noonewouldeverusethis7",
	27808	+	"checkType": "status_code"
	27809	+	},
	27810	+	".email": {
	27811	+	"protocol": "dns",
	27812	+	"url": "{username}.email",
	27813	+	"urlMain": "{username}.email",
	27814	+	"usernameClaimed": "alex",
	27815	+	"usernameUnclaimed": "noonewouldeverusethis7",
	27816	+	"checkType": "status_code"
	27817	+	},
	27818	+	".guru": {
	27819	+	"protocol": "dns",
	27820	+	"url": "{username}.guru",
	27821	+	"urlMain": "{username}.guru",
	27822	+	"usernameClaimed": "alex",
	27823	+	"usernameUnclaimed": "noonewouldeverusethis7",
	27824	+	"checkType": "status_code"
	27825	+	},
	27826	+	".ddns.net": {
	27827	+	"protocol": "dns",
	27828	+	"url": "{username}.ddns.net",
	27829	+	"urlMain": "{username}.ddns.net",
	27830	+	"usernameClaimed": "repack",
27783	27831		"usernameUnclaimed": "noonewouldeverusethis7",
27784	27832		"checkType": "status_code"
27785	27833		},
		skipped 606 lines

■ ■ ■ ■ ■ ■

maigret/resources/simple_report.tpl

		skipped 67 lines
68	68		<div class="row-mb">
69	69		<div class="col-md">
70	70		<div class="card flex-md-row mb-4 box-shadow h-md-250">
71		-	<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
	71	+	<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
72	72		<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
73	73		<h3 class="mb-0" style="padding-top: 1rem;">
74	74		<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
		skipped 35 lines

■ ■ ■ ■ ■ ■

maigret/sites.py

		skipped 303 lines
304	304		lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
305	305		)
306	306		is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
	307	+	is_protocol_in_tags = lambda x: x.protocol and x.protocol in normalized_tags
307	308		is_disabled_needed = lambda x: not x.disabled or (
308	309		"disabled" in tags or disabled
309	310		)
310	311		is_id_type_ok = lambda x: x.type == id_type
311	312
312		-	filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
	313	+	filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) or is_protocol_in_tags(x)
313	314		filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
314	315
315	316		filter_fun = (
		skipped 174 lines

■ ■ ■ ■ ■ ■

maigret/submit.py

		skipped 208 lines
209	209		):
210	210		custom_headers = {}
211	211		while True:
212		-	header_key = input('Specify custom header if you need or just press Enter to skip. Header name: ')
	212	+	header_key = input(
	213	+	'Specify custom header if you need or just press Enter to skip. Header name: '
	214	+	)
213	215		if not header_key:
214	216		break
215	217		header_value = input('Header value: ')
		skipped 173 lines

■ ■ ■ ■ ■ ■ ■

tests/test_report.py

		skipped 44 lines
45	45		}
46	46		}
47	47
	48	+	BROKEN_RESULTS = {
	49	+	'GitHub': {
	50	+	'username': 'test',
	51	+	'parsing_enabled': True,
	52	+	'url_main': 'https://www.github.com/',
	53	+	'url_user': 'https://www.github.com/test',
	54	+	'http_status': 200,
	55	+	'is_similar': False,
	56	+	'rank': 78,
	57	+	'site': MaigretSite('test', {}),
	58	+	}
	59	+	}
	60	+
48	61		GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
49	62		GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
50	63		GOOD_500PX_RESULT.ids_data = {
		skipped 188 lines
239	252		]
240	253
241	254		SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
	255	+	SUPPOSED_BROKEN_BRIEF = """Search by username alexaimephotographycars returned 0 accounts. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 2 accounts."""
	256	+
	257	+	SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
	258	+	SUPPOSED_BROKEN_GEO = "Geo: us <span class=\"text-muted\">(2)</span>"
242	259
243	260		SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
244		-
245		-	SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
	261	+	SUPPOSED_BROKEN_INTERESTS = "Interests: news <span class=\"text-muted\">(1)</span>, photo <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
246	262
247	263
248	264		def test_generate_report_template():
		skipped 21 lines
270	286		]
271	287
272	288
	289	+	def test_generate_csv_report_broken():
	290	+	csvfile = StringIO()
	291	+	generate_csv_report('test', BROKEN_RESULTS, csvfile)
	292	+
	293	+	csvfile.seek(0)
	294	+	data = csvfile.readlines()
	295	+
	296	+	assert data == [
	297	+	'username,name,url_main,url_user,exists,http_status\r\n',
	298	+	'test,GitHub,https://www.github.com/,https://www.github.com/test,Unknown,200\r\n',
	299	+	]
	300	+
	301	+
273	302		def test_generate_txt_report():
274	303		txtfile = StringIO()
275	304		generate_txt_report('test', EXAMPLE_RESULTS, txtfile)
		skipped 7 lines
283	312		]
284	313
285	314
	315	+	def test_generate_txt_report_broken():
	316	+	txtfile = StringIO()
	317	+	generate_txt_report('test', BROKEN_RESULTS, txtfile)
	318	+
	319	+	txtfile.seek(0)
	320	+	data = txtfile.readlines()
	321	+
	322	+	assert data == [
	323	+	'Total Websites Username Detected On : 0',
	324	+	]
	325	+
	326	+
286	327		def test_generate_json_simple_report():
287	328		jsonfile = StringIO()
288	329		MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
		skipped 7 lines
296	337		assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
297	338
298	339
	340	+	def test_generate_json_simple_report_broken():
	341	+	jsonfile = StringIO()
	342	+	MODIFIED_RESULTS = dict(BROKEN_RESULTS)
	343	+	MODIFIED_RESULTS['GitHub2'] = BROKEN_RESULTS['GitHub']
	344	+	generate_json_report('test', BROKEN_RESULTS, jsonfile, 'simple')
	345	+
	346	+	jsonfile.seek(0)
	347	+	data = jsonfile.readlines()
	348	+
	349	+	assert len(data) == 1
	350	+	assert list(json.loads(data[0]).keys()) == []
	351	+
	352	+
299	353		def test_generate_json_ndjson_report():
300	354		jsonfile = StringIO()
301	355		MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
		skipped 27 lines
329	383		)
330	384
331	385
	386	+	def test_save_xmind_report_broken():
	387	+	filename = 'report_test.xmind'
	388	+	save_xmind_report(filename, 'test', BROKEN_RESULTS)
	389	+
	390	+	workbook = xmind.load(filename)
	391	+	sheet = workbook.getPrimarySheet()
	392	+	data = sheet.getData()
	393	+
	394	+	assert data['title'] == 'test Analysis'
	395	+	assert data['topic']['title'] == 'test'
	396	+	assert len(data['topic']['topics']) == 1
	397	+	assert data['topic']['topics'][0]['title'] == 'Undefined'
	398	+
	399	+
332	400		def test_html_report():
333	401		report_name = 'report_test.html'
334	402		context = generate_report_context(TEST)
		skipped 6 lines
341	409		assert SUPPOSED_INTERESTS in report_text
342	410
343	411
	412	+	def test_html_report_broken():
	413	+	report_name = 'report_test_broken.html'
	414	+	BROKEN_DATA = copy.deepcopy(TEST)
	415	+	BROKEN_DATA[0][2]['500px']['status'] = None
	416	+
	417	+	context = generate_report_context(BROKEN_DATA)
	418	+	save_html_report(report_name, context)
	419	+
	420	+	report_text = open(report_name).read()
	421	+
	422	+	assert SUPPOSED_BROKEN_BRIEF in report_text
	423	+	assert SUPPOSED_BROKEN_GEO in report_text
	424	+	assert SUPPOSED_BROKEN_INTERESTS in report_text
	425	+
	426	+
344	427		def test_pdf_report():
345	428		report_name = 'report_test.pdf'
346	429		context = generate_report_context(TEST)
		skipped 11 lines
358	441		assert 'us' in report_text
359	442		assert 'photo' in report_text
360	443
	444	+
	445	+	def test_text_report_broken():
	446	+	BROKEN_DATA = copy.deepcopy(TEST)
	447	+	BROKEN_DATA[0][2]['500px']['status'] = None
	448	+
	449	+	context = generate_report_context(BROKEN_DATA)
	450	+	report_text = get_plaintext_report(context)
	451	+
	452	+	for brief_part in SUPPOSED_BROKEN_BRIEF.split():
	453	+	assert brief_part in report_text
	454	+	assert 'us' in report_text
	455	+	assert 'photo' in report_text
	456	+