STRLCPY/maigret

■ ■ ■ ■ ■ ■

.gitignore

		skipped 21 lines
22	22		# Comma-Separated Values (CSV) Reports
23	23		*.csv
24	24
25		-	# Excluded sites list
26		-	tests/.excluded_sites
27		-
28	25		# MacOS Folder Metadata File
29	26		.DS_Store
30	27		/reports/
		skipped 2 lines
33	30		.coverage
34	31		dist/
35	32		htmlcov/
36		-	test_*
	33	+	/test_*

■ ■ ■ ■ ■ ■

maigret/maigret.py

		skipped 59 lines
60	60		)
61	61
62	62
	63	+	def extract_ids_from_url(url: str, db: MaigretDatabase) -> dict:
	64	+	results = {}
	65	+	for s in db.sites:
	66	+	result = s.extract_id_from_url(url)
	67	+	if not result:
	68	+	continue
	69	+	_id, _type = result
	70	+	results[_id] = _type
	71	+	return results
	72	+
	73	+
63	74		def extract_ids_from_page(url, logger, timeout=5) -> dict:
64	75		results = {}
65	76		# url, headers
		skipped 39 lines
105	116		ids_results[u] = utype
106	117
107	118		for url in dictionary.get('ids_links', []):
108		-	for s in db.sites:
109		-	u = s.detect_username(url)
110		-	if u:
111		-	ids_results[u] = 'username'
	119	+	ids_results.update(extract_ids_from_url(url, db))
	120	+
112	121		return ids_results
113	122
114	123
		skipped 14 lines
129	138		)
130	139		parser.add_argument(
131	140		"username",
132		-	nargs='?',
	141	+	nargs='*',
133	142		metavar="USERNAMES",
134		-	action="append",
135		-	help="One or more usernames to check with social networks.",
	143	+	help="One or more usernames to search by.",
136	144		)
137	145		parser.add_argument(
138	146		"--version",
		skipped 92 lines
231	239		help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
232	240		)
233	241
234		-	filter_group = parser.add_argument_group('Site filtering', 'Options to set site search scope')
	242	+	filter_group = parser.add_argument_group(
	243	+	'Site filtering', 'Options to set site search scope'
	244	+	)
235	245		filter_group.add_argument(
236	246		"-a",
237	247		"--all-sites",
		skipped 31 lines
269	279		modes_group = parser.add_argument_group(
270	280		'Operating modes',
271	281		'Various functions except the default search by a username. '
272		-	'Modes are executed sequentially in the order of declaration.'
	282	+	'Modes are executed sequentially in the order of declaration.',
273	283		)
274	284		modes_group.add_argument(
275	285		"--parse",
		skipped 20 lines
296	306		"--stats",
297	307		action="store_true",
298	308		default=False,
299		-	help="Show database statistics (most frequent sites engines and tags)."
	309	+	help="Show database statistics (most frequent sites engines and tags).",
300	310		)
301	311
302		-	output_group = parser.add_argument_group('Output options', 'Options to change verbosity and view of the console output')
	312	+	output_group = parser.add_argument_group(
	313	+	'Output options', 'Options to change verbosity and view of the console output'
	314	+	)
303	315		output_group.add_argument(
304	316		"--print-not-found",
305	317		action="store_true",
		skipped 48 lines
354	366		help="Don't show progressbar.",
355	367		)
356	368
357		-	report_group = parser.add_argument_group('Report formats', 'Supported formats of report files')
	369	+	report_group = parser.add_argument_group(
	370	+	'Report formats', 'Supported formats of report files'
	371	+	)
358	372		report_group.add_argument(
359	373		"-T",
360	374		"--txt",
		skipped 85 lines
446	460		print("Using the proxy: " + args.proxy)
447	461
448	462		if args.parse_url:
449		-	extracted_ids = extract_ids_from_page(args.parse_url, logger, timeout=args.timeout)
	463	+	extracted_ids = extract_ids_from_page(
	464	+	args.parse_url, logger, timeout=args.timeout
	465	+	)
450	466		usernames.update(extracted_ids)
451	467
452	468		if args.tags:
		skipped 194 lines

■ ■ ■ ■ ■ ■

maigret/notify.py

		skipped 281 lines
282	282		sys.stdout.write("\x1b[1K\r")
283	283		print(notify)
284	284
	285	+	return notify
	286	+
285	287		def __str__(self):
286	288		"""Convert Object To String.
287	289
		skipped 10 lines

■ ■ ■ ■ ■ ■

maigret/resources/data.json

		skipped 14364 lines
14365	14365		"ru"
14366	14366		],
14367	14367		"checkType": "response_url",
	14368	+	"regexCheck": "^(?!id\\d)\\w*$",
14368	14369		"alexaRank": 27,
14369	14370		"urlMain": "https://vk.com/",
14370	14371		"url": "https://vk.com/{username}",
		skipped 8 lines
14379	14380		"checkType": "response_url",
14380	14381		"alexaRank": 27,
14381	14382		"urlMain": "https://vk.com/",
	14383	+	"regexCheck": "^\\d+$",
14382	14384		"url": "https://vk.com/id{username}",
14383	14385		"source": "VK",
14384	14386		"usernameClaimed": "270433952",
		skipped 11815 lines

■ ■ ■ ■ ■ ■ ■

maigret/sites.py

		skipped 2 lines
3	3		import copy
4	4		import json
5	5		import sys
6		-	from typing import Optional, List, Dict, Any
	6	+	from typing import Optional, List, Dict, Any, Tuple
7	7
8	8		import requests
9	9
		skipped 135 lines
145	145		return match_groups.groups()[-1].rstrip("/")
146	146
147	147		return None
	148	+
	149	+	def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
	150	+	if not self.url_regexp:
	151	+	return None
	152	+
	153	+	match_groups = self.url_regexp.match(url)
	154	+	if not match_groups:
	155	+	return None
	156	+
	157	+	_id = match_groups.groups()[-1].rstrip("/")
	158	+	_type = self.type
	159	+
	160	+	return _id, _type
148	161
149	162		@property
150	163		def pretty_name(self):
		skipped 303 lines

■ ■ ■ ■ ■ ■

maigret/utils.py

		skipped 54 lines
55	55		url_main_part = self.extract_main_part(url)
56	56		for c in self.UNSAFE_SYMBOLS:
57	57		url_main_part = url_main_part.replace(c, f"\\{c}")
58		-	username_regexp = username_regexp or ".+?"
	58	+	prepared_username_regexp = (username_regexp or ".+?").lstrip('^').rstrip('$')
59	59
60		-	url_regexp = url_main_part.replace("{username}", f"({username_regexp})")
	60	+	url_regexp = url_main_part.replace(
	61	+	"{username}", f"({prepared_username_regexp})"
	62	+	)
61	63		regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
62	64
63	65		return re.compile(regexp_str)
		skipped 26 lines

■ ■ ■ ■ ■ ■ ■

tests/test_cli.py

		skipped 50 lines
51	51		assert args == Namespace(**want_args)
52	52
53	53
	54	+	def test_args_search_mode_several_usernames(argparser):
	55	+	args = argparser.parse_args('username1 username2'.split())
	56	+
	57	+	assert args.username == ['username1', 'username2']
	58	+
	59	+	want_args = dict(DEFAULT_ARGS)
	60	+	want_args.update({'username': ['username1', 'username2']})
	61	+
	62	+	assert args == Namespace(**want_args)
	63	+
	64	+
54	65		def test_args_self_check_mode(argparser):
55	66		args = argparser.parse_args('--self-check --site GitHub'.split())
56	67
		skipped 2 lines
59	70		{
60	71		'self_check': True,
61	72		'site_list': ['GitHub'],
62		-	'username': [None],
	73	+	'username': [],
63	74		}
64	75		)
65	76
		skipped 16 lines

■ ■ ■ ■ ■ ■

tests/test_maigret.py

		skipped 4 lines
5	5		import pytest
6	6		from mock import Mock
7	7
8		-	from maigret.maigret import self_check, maigret, extract_ids_from_page, extract_ids_from_results
	8	+	from maigret.maigret import self_check, maigret
	9	+	from maigret.maigret import extract_ids_from_page, extract_ids_from_results, extract_ids_from_url
9	10		from maigret.sites import MaigretSite
10	11		from maigret.result import QueryResult, QueryStatus
11	12
		skipped 125 lines
137	138		assert results == RESULTS_EXAMPLE
138	139
139	140
	141	+	def test_extract_ids_from_url(default_db):
	142	+	assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {'test': 'username'}
	143	+	assert extract_ids_from_url('https://vk.com/id123', default_db) == {'123': 'vk_id'}
	144	+	assert extract_ids_from_url('https://vk.com/ida123', default_db) == {'ida123': 'username'}
	145	+	assert extract_ids_from_url('https://my.mail.ru/yandex.ru/dipres8904/', default_db) == {'dipres8904': 'username'}
	146	+	assert extract_ids_from_url('https://reviews.yandex.ru/user/adbced123', default_db) == {'adbced123': 'yandex_public_id'}
	147	+
	148	+
140	149		@pytest.mark.slow
141	150		def test_extract_ids_from_page(test_db):
142	151		logger = Mock()
143		-	found_ids = extract_ids_from_page('https://www.reddit.com/user/test', logger)
144		-	assert found_ids == {'test': 'username'}
	152	+	extract_ids_from_page('https://www.reddit.com/user/test', logger) == {'test': 'username'}
145	153
146	154
147	155		def test_extract_ids_from_results(test_db):
		skipped 1 lines
149	157		TEST_EXAMPLE['Reddit']['ids_usernames'] = {'test1': 'yandex_public_id'}
150	158		TEST_EXAMPLE['Reddit']['ids_links'] = ['https://www.reddit.com/user/test2']
151	159
152		-	found_ids = extract_ids_from_results(TEST_EXAMPLE, test_db)
153		-	assert found_ids == {'test1': 'yandex_public_id', 'test2': 'username'}
	160	+	extract_ids_from_results(TEST_EXAMPLE, test_db) == {'test1': 'yandex_public_id', 'test2': 'username'}
154	161

■ ■ ■ ■ ■ ■

tests/test_notify.py

1	+	from maigret.errors import CheckError
2	+	from maigret.notify import QueryNotifyPrint
3	+	from maigret.result import QueryStatus, QueryResult
4	+
5	+
6	+	def test_notify_illegal():
7	+	n = QueryNotifyPrint(color=False)
8	+
9	+	assert n.update(QueryResult(
10	+	username="test",
11	+	status=QueryStatus.ILLEGAL,
12	+	site_name="TEST_SITE",
13	+	site_url_user="http://example.com/test"
14	+	)) == "[-] TEST_SITE: Illegal Username Format For This Site!"
15	+
16	+
17	+	def test_notify_claimed():
18	+	n = QueryNotifyPrint(color=False)
19	+
20	+	assert n.update(QueryResult(
21	+	username="test",
22	+	status=QueryStatus.CLAIMED,
23	+	site_name="TEST_SITE",
24	+	site_url_user="http://example.com/test"
25	+	)) == "[+] TEST_SITE: http://example.com/test"
26	+
27	+
28	+	def test_notify_available():
29	+	n = QueryNotifyPrint(color=False)
30	+
31	+	assert n.update(QueryResult(
32	+	username="test",
33	+	status=QueryStatus.AVAILABLE,
34	+	site_name="TEST_SITE",
35	+	site_url_user="http://example.com/test"
36	+	)) == "[-] TEST_SITE: Not found!"
37	+
38	+
39	+	def test_notify_unknown():
40	+	n = QueryNotifyPrint(color=False)
41	+	result = QueryResult(
42	+	username="test",
43	+	status=QueryStatus.UNKNOWN,
44	+	site_name="TEST_SITE",
45	+	site_url_user="http://example.com/test"
46	+	)
47	+	result.error = CheckError('Type', 'Reason')
48	+
49	+	assert n.update(result) == "[?] TEST_SITE: Type error: Reason"
50	+

■ ■ ■ ■ ■ ■

tests/test_utils.py

		skipped 67 lines
68	68		]
69	69
70	70		url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
	71	+	# combine parts variations
71	72		for url_parts in itertools.product(*parts):
72	73		url = ''.join(url_parts)
	74	+	# ensure all combinations give valid main part
73	75		assert URLMatcher.extract_main_part(url) == url_main_part
74	76		assert not url_regexp.match(url) is None
75	77
		skipped 8 lines
84	86		['/', ''],
85	87		]
86	88
	89	+	# combine parts variations
87	90		for url_parts in itertools.product(*parts):
88	91		url = ''.join(url_parts)
	92	+	# ensure all combinations match pattern
89	93		assert (
90	94		URLMatcher.make_profile_url_regexp(url).pattern
91	95		== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
		skipped 39 lines

Improve extracting ids from URLs, tests