STRLCPY/maigret

Refactoring and linting, added notifications about frequent search errors
Soxoj committed 3 years ago

bfa6afac

1 parent bfaf276f

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

■ ■ ■ ■ ■ ■

format.sh

1 + #!/bin/sh
2 + FILES="maigret wizard.py maigret.py"
3 +
4 + echo 'black'
5 + black --skip-string-normalization $FILES

All occurrences

■ ■ ■ ■ ■ ■

lint.sh

1	+	#!/bin/sh
2	+	FILES="maigret wizard.py maigret.py"
3	+
4	+	echo 'syntax errors or undefined names'
5	+	flake8 --count --select=E9,F63,F7,F82 --show-source --statistics $FILES
6	+
7	+	echo 'warning'
8	+	flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503 $FILES
9	+
10	+	echo 'mypy'
11	+	mypy ./maigret

■ ■ ■ ■ ■ ■

maigret/__init__.py

skipped 2 lines
3 3 from .checking import maigret as search
4 4 from .sites import MaigretEngine, MaigretSite, MaigretDatabase
5 5 from .notify import QueryNotifyPrint as Notifier
6 +

All occurrences

■ ■ ■ ■ ■ ■

maigret/activation.py

		skipped 8 lines
9	9		@staticmethod
10	10		def twitter(site, logger, cookies={}):
11	11		headers = dict(site.headers)
12		-	del headers['x-guest-token']
13		-	r = requests.post(site.activation['url'], headers=headers)
	12	+	del headers["x-guest-token"]
	13	+	r = requests.post(site.activation["url"], headers=headers)
14	14		logger.info(r)
15	15		j = r.json()
16		-	guest_token = j[site.activation['src']]
17		-	site.headers['x-guest-token'] = guest_token
	16	+	guest_token = j[site.activation["src"]]
	17	+	site.headers["x-guest-token"] = guest_token
18	18
19	19		@staticmethod
20	20		def vimeo(site, logger, cookies={}):
21	21		headers = dict(site.headers)
22		-	if 'Authorization' in headers:
23		-	del headers['Authorization']
24		-	r = requests.get(site.activation['url'], headers=headers)
25		-	jwt_token = r.json()['jwt']
26		-	site.headers['Authorization'] = 'jwt ' + jwt_token
	22	+	if "Authorization" in headers:
	23	+	del headers["Authorization"]
	24	+	r = requests.get(site.activation["url"], headers=headers)
	25	+	jwt_token = r.json()["jwt"]
	26	+	site.headers["Authorization"] = "jwt " + jwt_token
27	27
28	28		@staticmethod
29	29		def spotify(site, logger, cookies={}):
30	30		headers = dict(site.headers)
31		-	if 'Authorization' in headers:
32		-	del headers['Authorization']
33		-	r = requests.get(site.activation['url'])
34		-	bearer_token = r.json()['accessToken']
35		-	site.headers['authorization'] = f'Bearer {bearer_token}'
	31	+	if "Authorization" in headers:
	32	+	del headers["Authorization"]
	33	+	r = requests.get(site.activation["url"])
	34	+	bearer_token = r.json()["accessToken"]
	35	+	site.headers["authorization"] = f"Bearer {bearer_token}"
36	36
37	37		@staticmethod
38	38		def xssis(site, logger, cookies={}):
39	39		if not cookies:
40		-	logger.debug('You must have cookies to activate xss.is parsing!')
	40	+	logger.debug("You must have cookies to activate xss.is parsing!")
41	41		return
42	42
43	43		headers = dict(site.headers)
44	44		post_data = {
45		-	'_xfResponseType': 'json',
46		-	'_xfToken': '1611177919,a2710362e45dad9aa1da381e21941a38'
	45	+	"_xfResponseType": "json",
	46	+	"_xfToken": "1611177919,a2710362e45dad9aa1da381e21941a38",
47	47		}
48		-	headers['content-type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
49		-	r = requests.post(site.activation['url'], headers=headers, cookies=cookies, data=post_data)
50		-	csrf = r.json()['csrf']
51		-	site.get_params['_xfToken'] = csrf
	48	+	headers["content-type"] = "application/x-www-form-urlencoded; charset=UTF-8"
	49	+	r = requests.post(
	50	+	site.activation["url"], headers=headers, cookies=cookies, data=post_data
	51	+	)
	52	+	csrf = r.json()["csrf"]
	53	+	site.get_params["_xfToken"] = csrf
52	54
53	55
54	56		async def import_aiohttp_cookies(cookiestxt_filename):
		skipped 7 lines
62	64		for key, cookie in list(domain.values())[0].items():
63	65		c = Morsel()
64	66		c.set(key, cookie.value, cookie.value)
65		-	c['domain'] = cookie.domain
66		-	c['path'] = cookie.path
	67	+	c["domain"] = cookie.domain
	68	+	c["path"] = cookie.path
67	69		cookies_list.append((key, c))
68	70
69	71		cookies.update_cookies(cookies_list)
		skipped 3 lines

■ ■ ■ ■ ■ ■

maigret/checking.py

		skipped 4 lines
5	5		import ssl
6	6		import sys
7	7		import tqdm
8		-	import time
	8	+	from typing import Tuple, Optional
9	9
10	10		import aiohttp
11	11		import tqdm.asyncio
12	12		from aiohttp_socks import ProxyConnector
13		-	from mock import Mock
14	13		from python_socks import _errors as proxy_errors
15	14		from socid_extractor import extract
16	15
17	16		from .activation import ParsingActivator, import_aiohttp_cookies
	17	+	from . import errors
	18	+	from .errors import CheckError
18	19		from .executors import AsyncioSimpleExecutor, AsyncioProgressbarQueueExecutor
19	20		from .result import QueryResult, QueryStatus
20	21		from .sites import MaigretDatabase, MaigretSite
21		-	from .types import CheckError
22	22		from .utils import get_random_user_agent
23	23
24	24
25	25		supported_recursive_search_ids = (
26		-	'yandex_public_id',
27		-	'gaia_id',
28		-	'vk_id',
29		-	'ok_id',
30		-	'wikimapia_uid',
31		-	'steam_id',
32		-	'uidme_uguid',
	26	+	"yandex_public_id",
	27	+	"gaia_id",
	28	+	"vk_id",
	29	+	"ok_id",
	30	+	"wikimapia_uid",
	31	+	"steam_id",
	32	+	"uidme_uguid",
33	33		)
34	34
35		-	common_errors = {
36		-	'<title>Attention Required! \| Cloudflare</title>': CheckError('Captcha', 'Cloudflare'),
37		-	'Please stand by, while we are checking your browser': CheckError('Bot protection', 'Cloudflare'),
38		-	'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
39		-	'document.getElementById(\'validate_form_submit\').disabled=true': CheckError('Captcha', 'Mail.ru'),
40		-	'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': CheckError('Bot protection', 'Blazingfast'),
41		-	'404</h1><p class="error-card__description">Мы не нашли страницу': CheckError('Resolving', 'MegaFon 404 page'),
42		-	'Доступ к информационному ресурсу ограничен на основании Федерального закона': CheckError('Censorship', 'MGTS'),
43		-	'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
44		-	}
	35	+	unsupported_characters = "#"
45	36
46		-	unsupported_characters = '#'
47	37
48		-
49		-	async def get_response(request_future, site_name, logger) -> (str, int, CheckError):
	38	+	async def get_response(
	39	+	request_future, site_name, logger
	40	+	) -> Tuple[str, int, Optional[CheckError]]:
50	41		html_text = None
51	42		status_code = 0
52		-	error = CheckError('Error')
	43	+	error: Optional[CheckError] = CheckError("Error")
53	44
54	45		try:
55	46		response = await request_future
56	47
57	48		status_code = response.status
58	49		response_content = await response.content.read()
59		-	charset = response.charset or 'utf-8'
60		-	decoded_content = response_content.decode(charset, 'ignore')
	50	+	charset = response.charset or "utf-8"
	51	+	decoded_content = response_content.decode(charset, "ignore")
61	52		html_text = decoded_content
62	53
63	54		if status_code == 0:
64		-	error = CheckError('Connection lost')
	55	+	error = CheckError("Connection lost")
65	56		else:
66	57		error = None
67	58
68	59		logger.debug(html_text)
69	60
70	61		except asyncio.TimeoutError as e:
71		-	error = CheckError('Request timeout', str(e))
	62	+	error = CheckError("Request timeout", str(e))
72	63		except aiohttp.client_exceptions.ClientConnectorError as e:
73		-	error = CheckError('Connecting failure', str(e))
	64	+	error = CheckError("Connecting failure", str(e))
74	65		except aiohttp.http_exceptions.BadHttpMessage as e:
75		-	error = CheckError('HTTP', str(e))
	66	+	error = CheckError("HTTP", str(e))
76	67		except proxy_errors.ProxyError as e:
77		-	error = CheckError('Proxy', str(e))
	68	+	error = CheckError("Proxy", str(e))
	69	+	except KeyboardInterrupt:
	70	+	error = CheckError("Interrupted")
78	71		except Exception as e:
79	72		# python-specific exceptions
80	73		if sys.version_info.minor > 6:
81		-	if isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError):
82		-	error = CheckError('SSL', str(e))
	74	+	if isinstance(e, ssl.SSLCertVerificationError) or isinstance(
	75	+	e, ssl.SSLError
	76	+	):
	77	+	error = CheckError("SSL", str(e))
83	78		else:
84		-	logger.warning(f'Unhandled error while requesting {site_name}: {e}')
	79	+	logger.warning(f"Unhandled error while requesting {site_name}: {e}")
85	80		logger.debug(e, exc_info=True)
86		-	error = CheckError('Error', str(e))
	81	+	error = CheckError("Error", str(e))
87	82
88	83		# TODO: return only needed information
89		-	return html_text, status_code, error
	84	+	return str(html_text), status_code, error
90	85
91	86
92		-	async def update_site_dict_from_response(sitename, site_dict, results_info, logger, query_notify):
	87	+	async def update_site_dict_from_response(
	88	+	sitename, site_dict, results_info, logger, query_notify
	89	+	):
93	90		site_obj = site_dict[sitename]
94	91		future = site_obj.request_future
95	92		if not future:
96	93		# ignore: search by incompatible id type
97	94		return
98	95
99		-	response = await get_response(request_future=future,
100		-	site_name=sitename,
101		-	logger=logger)
	96	+	response = await get_response(
	97	+	request_future=future, site_name=sitename, logger=logger
	98	+	)
102	99
103		-	return sitename, process_site_result(response, query_notify, logger, results_info, site_obj)
	100	+	return sitename, process_site_result(
	101	+	response, query_notify, logger, results_info, site_obj
	102	+	)
104	103
105	104
106	105		# TODO: move to separate class
107		-	def detect_error_page(html_text, status_code, fail_flags, ignore_403) -> CheckError:
	106	+	def detect_error_page(
	107	+	html_text, status_code, fail_flags, ignore_403
	108	+	) -> Optional[CheckError]:
108	109		# Detect service restrictions such as a country restriction
109	110		for flag, msg in fail_flags.items():
110	111		if flag in html_text:
111		-	return CheckError('Site-specific', msg)
	112	+	return CheckError("Site-specific", msg)
112	113
113	114		# Detect common restrictions such as provider censorship and bot protection
114		-	for flag, err in common_errors.items():
115		-	if flag in html_text:
116		-	return err
	115	+	err = errors.detect(html_text)
	116	+	if err:
	117	+	return err
117	118
118	119		# Detect common site errors
119	120		if status_code == 403 and not ignore_403:
120		-	return CheckError('Access denied', '403 status code, use proxy/vpn')
	121	+	return CheckError("Access denied", "403 status code, use proxy/vpn")
121	122
122	123		elif status_code >= 500:
123		-	return CheckError(f'Server', f'{status_code} status code')
	124	+	return CheckError("Server", f"{status_code} status code")
124	125
125	126		return None
126	127
127	128
128		-	def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
	129	+	def process_site_result(
	130	+	response, query_notify, logger, results_info, site: MaigretSite
	131	+	):
129	132		if not response:
130	133		return results_info
131	134
132	135		fulltags = site.tags
133	136
134	137		# Retrieve other site information again
135		-	username = results_info['username']
136		-	is_parsing_enabled = results_info['parsing_enabled']
	138	+	username = results_info["username"]
	139	+	is_parsing_enabled = results_info["parsing_enabled"]
137	140		url = results_info.get("url_user")
138	141		logger.debug(url)
139	142
		skipped 7 lines
147	150
148	151		# TODO: refactor
149	152		if not response:
150		-	logger.error(f'No response for {site.name}')
	153	+	logger.error(f"No response for {site.name}")
151	154		return results_info
152	155
153	156		html_text, status_code, check_error = response
		skipped 2 lines
156	159		response_time = None
157	160
158	161		if logger.level == logging.DEBUG:
159		-	with open('debug.txt', 'a') as f:
160		-	status = status_code or 'No response'
161		-	f.write(f'url: {url}\nerror: {check_error}\nr: {status}\n')
	162	+	with open("debug.txt", "a") as f:
	163	+	status = status_code or "No response"
	164	+	f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
162	165		if html_text:
163		-	f.write(f'code: {status}\nresponse: {str(html_text)}\n')
	166	+	f.write(f"code: {status}\nresponse: {str(html_text)}\n")
164	167
165	168		# additional check for errors
166	169		if status_code and not check_error:
167		-	check_error = detect_error_page(html_text, status_code, site.errors, site.ignore403)
	170	+	check_error = detect_error_page(
	171	+	html_text, status_code, site.errors, site.ignore403
	172	+	)
168	173
169	174		if site.activation and html_text:
170		-	is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
	175	+	is_need_activation = any(
	176	+	[s for s in site.activation["marks"] if s in html_text]
	177	+	)
171	178		if is_need_activation:
172		-	method = site.activation['method']
	179	+	method = site.activation["method"]
173	180		try:
174	181		activate_fun = getattr(ParsingActivator(), method)
175	182		# TODO: async call
176	183		activate_fun(site, logger)
177	184		except AttributeError:
178		-	logger.warning(f'Activation method {method} for site {site.name} not found!')
	185	+	logger.warning(
	186	+	f"Activation method {method} for site {site.name} not found!"
	187	+	)
179	188		except Exception as e:
180		-	logger.warning(f'Failed activation {method} for site {site.name}: {e}')
	189	+	logger.warning(f"Failed activation {method} for site {site.name}: {e}")
181	190
182	191		site_name = site.pretty_name
183	192		# presense flags
		skipped 3 lines
187	196		if html_text:
188	197		if not presense_flags:
189	198		is_presense_detected = True
190		-	site.stats['presense_flag'] = None
	199	+	site.stats["presense_flag"] = None
191	200		else:
192	201		for presense_flag in presense_flags:
193	202		if presense_flag in html_text:
194	203		is_presense_detected = True
195		-	site.stats['presense_flag'] = presense_flag
	204	+	site.stats["presense_flag"] = presense_flag
196	205		logger.debug(presense_flag)
197	206		break
198	207
199	208		if check_error:
200	209		logger.debug(check_error)
201		-	result = QueryResult(username,
202		-	site_name,
203		-	url,
204		-	QueryStatus.UNKNOWN,
205		-	query_time=response_time,
206		-	error=check_error,
207		-	context=str(CheckError), tags=fulltags)
	210	+	result = QueryResult(
	211	+	username,
	212	+	site_name,
	213	+	url,
	214	+	QueryStatus.UNKNOWN,
	215	+	query_time=response_time,
	216	+	error=check_error,
	217	+	context=str(CheckError),
	218	+	tags=fulltags,
	219	+	)
208	220		elif check_type == "message":
209	221		absence_flags = site.absence_strs
210	222		is_absence_flags_list = isinstance(absence_flags, list)
211		-	absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
	223	+	absence_flags_set = (
	224	+	set(absence_flags) if is_absence_flags_list else {absence_flags}
	225	+	)
212	226		# Checks if the error message is in the HTML
213		-	is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
	227	+	is_absence_detected = any(
	228	+	[(absence_flag in html_text) for absence_flag in absence_flags_set]
	229	+	)
214	230		if not is_absence_detected and is_presense_detected:
215		-	result = QueryResult(username,
216		-	site_name,
217		-	url,
218		-	QueryStatus.CLAIMED,
219		-	query_time=response_time, tags=fulltags)
	231	+	result = QueryResult(
	232	+	username,
	233	+	site_name,
	234	+	url,
	235	+	QueryStatus.CLAIMED,
	236	+	query_time=response_time,
	237	+	tags=fulltags,
	238	+	)
220	239		else:
221		-	result = QueryResult(username,
222		-	site_name,
223		-	url,
224		-	QueryStatus.AVAILABLE,
225		-	query_time=response_time, tags=fulltags)
	240	+	result = QueryResult(
	241	+	username,
	242	+	site_name,
	243	+	url,
	244	+	QueryStatus.AVAILABLE,
	245	+	query_time=response_time,
	246	+	tags=fulltags,
	247	+	)
226	248		elif check_type == "status_code":
227	249		# Checks if the status code of the response is 2XX
228	250		if (not status_code >= 300 or status_code < 200) and is_presense_detected:
229		-	result = QueryResult(username,
230		-	site_name,
231		-	url,
232		-	QueryStatus.CLAIMED,
233		-	query_time=response_time, tags=fulltags)
	251	+	result = QueryResult(
	252	+	username,
	253	+	site_name,
	254	+	url,
	255	+	QueryStatus.CLAIMED,
	256	+	query_time=response_time,
	257	+	tags=fulltags,
	258	+	)
234	259		else:
235		-	result = QueryResult(username,
236		-	site_name,
237		-	url,
238		-	QueryStatus.AVAILABLE,
239		-	query_time=response_time, tags=fulltags)
	260	+	result = QueryResult(
	261	+	username,
	262	+	site_name,
	263	+	url,
	264	+	QueryStatus.AVAILABLE,
	265	+	query_time=response_time,
	266	+	tags=fulltags,
	267	+	)
240	268		elif check_type == "response_url":
241	269		# For this detection method, we have turned off the redirect.
242	270		# So, there is no need to check the response URL: it will always
		skipped 1 lines
244	272		# code indicates that the request was successful (i.e. no 404, or
245	273		# forward to some odd redirect).
246	274		if 200 <= status_code < 300 and is_presense_detected:
247		-	result = QueryResult(username,
248		-	site_name,
249		-	url,
250		-	QueryStatus.CLAIMED,
251		-	query_time=response_time, tags=fulltags)
	275	+	result = QueryResult(
	276	+	username,
	277	+	site_name,
	278	+	url,
	279	+	QueryStatus.CLAIMED,
	280	+	query_time=response_time,
	281	+	tags=fulltags,
	282	+	)
252	283		else:
253		-	result = QueryResult(username,
254		-	site_name,
255		-	url,
256		-	QueryStatus.AVAILABLE,
257		-	query_time=response_time, tags=fulltags)
	284	+	result = QueryResult(
	285	+	username,
	286	+	site_name,
	287	+	url,
	288	+	QueryStatus.AVAILABLE,
	289	+	query_time=response_time,
	290	+	tags=fulltags,
	291	+	)
258	292		else:
259	293		# It should be impossible to ever get here...
260		-	raise ValueError(f"Unknown check type '{check_type}' for "
261		-	f"site '{site.name}'")
	294	+	raise ValueError(
	295	+	f"Unknown check type '{check_type}' for " f"site '{site.name}'"
	296	+	)
262	297
263	298		extracted_ids_data = {}
264	299
		skipped 1 lines
266	301		try:
267	302		extracted_ids_data = extract(html_text)
268	303		except Exception as e:
269		-	logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True)
	304	+	logger.warning(f"Error while parsing {site.name}: {e}", exc_info=True)
270	305
271	306		if extracted_ids_data:
272	307		new_usernames = {}
273	308		for k, v in extracted_ids_data.items():
274		-	if 'username' in k:
275		-	new_usernames[v] = 'username'
	309	+	if "username" in k:
	310	+	new_usernames[v] = "username"
276	311		if k in supported_recursive_search_ids:
277	312		new_usernames[v] = k
278	313
279		-	results_info['ids_usernames'] = new_usernames
280		-	results_info['ids_links'] = eval(extracted_ids_data.get('links', '[]'))
	314	+	results_info["ids_usernames"] = new_usernames
	315	+	results_info["ids_links"] = eval(extracted_ids_data.get("links", "[]"))
281	316		result.ids_data = extracted_ids_data
282	317
283	318		# Notify caller about results of query.
284	319		query_notify.update(result, site.similar_search)
285	320
286	321		# Save status of request
287		-	results_info['status'] = result
	322	+	results_info["status"] = result
288	323
289	324		# Save results from request
290		-	results_info['http_status'] = status_code
291		-	results_info['is_similar'] = site.similar_search
	325	+	results_info["http_status"] = status_code
	326	+	results_info["is_similar"] = site.similar_search
292	327		# results_site['response_text'] = html_text
293		-	results_info['rank'] = site.alexa_rank
	328	+	results_info["rank"] = site.alexa_rank
294	329		return results_info
295	330
296	331
297		-	async def maigret(username, site_dict, logger, query_notify=None,
298		-	proxy=None, timeout=None, is_parsing_enabled=False,
299		-	id_type='username', debug=False, forced=False,
300		-	max_connections=100, no_progressbar=False,
301		-	cookies=None):
	332	+	async def maigret(
	333	+	username,
	334	+	site_dict,
	335	+	logger,
	336	+	query_notify=None,
	337	+	proxy=None,
	338	+	timeout=None,
	339	+	is_parsing_enabled=False,
	340	+	id_type="username",
	341	+	debug=False,
	342	+	forced=False,
	343	+	max_connections=100,
	344	+	no_progressbar=False,
	345	+	cookies=None,
	346	+	):
302	347		"""Main search func
303	348
304	349		Checks for existence of username on certain sites.
		skipped 37 lines
342	387		query_notify.start(username, id_type)
343	388
344	389		# TODO: connector
345		-	connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
	390	+	connector = (
	391	+	ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
	392	+	)
346	393		# connector = aiohttp.TCPConnector(ssl=False)
347	394		connector.verify_ssl = False
348	395
349	396		cookie_jar = None
350	397		if cookies:
351		-	logger.debug(f'Using cookies jar file {cookies}')
	398	+	logger.debug(f"Using cookies jar file {cookies}")
352	399		cookie_jar = await import_aiohttp_cookies(cookies)
353	400
354		-	session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar)
	401	+	session = aiohttp.ClientSession(
	402	+	connector=connector, trust_env=True, cookie_jar=cookie_jar
	403	+	)
355	404
356	405		if logger.level == logging.DEBUG:
357		-	future = session.get(url='https://icanhazip.com')
	406	+	future = session.get(url="https://icanhazip.com")
358	407		ip, status, check_error = await get_response(future, None, logger)
359	408		if ip:
360		-	logger.debug(f'My IP is: {ip.strip()}')
	409	+	logger.debug(f"My IP is: {ip.strip()}")
361	410		else:
362		-	logger.debug(f'IP requesting {check_error[0]}: {check_error[1]}')
	411	+	logger.debug(f"IP requesting {check_error[0]}: {check_error[1]}")
363	412
364	413		# Results from analysis of all sites
365	414		results_total = {}
		skipped 5 lines
371	420		continue
372	421
373	422		if site.disabled and not forced:
374		-	logger.debug(f'Site {site.name} is disabled, skipping...')
	423	+	logger.debug(f"Site {site.name} is disabled, skipping...")
375	424		continue
376	425
377	426		# Results from analysis of this specific site
378	427		results_site = {}
379	428
380	429		# Record URL of main site and username
381		-	results_site['username'] = username
382		-	results_site['parsing_enabled'] = is_parsing_enabled
383		-	results_site['url_main'] = site.url_main
384		-	results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
	430	+	results_site["username"] = username
	431	+	results_site["parsing_enabled"] = is_parsing_enabled
	432	+	results_site["url_main"] = site.url_main
	433	+	results_site["cookies"] = (
	434	+	cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
	435	+	)
385	436
386	437		headers = {
387		-	'User-Agent': get_random_user_agent(),
	438	+	"User-Agent": get_random_user_agent(),
388	439		}
389	440
390	441		headers.update(site.headers)
391	442
392		-	if 'url' not in site.__dict__:
393		-	logger.error('No URL for site %s', site.name)
	443	+	if "url" not in site.__dict__:
	444	+	logger.error("No URL for site %s", site.name)
394	445		# URL of user on site (if it exists)
395	446		url = site.url.format(
396		-	urlMain=site.url_main,
397		-	urlSubpath=site.url_subpath,
398		-	username=username
	447	+	urlMain=site.url_main, urlSubpath=site.url_subpath, username=username
399	448		)
400	449		# workaround to prevent slash errors
401		-	url = re.sub('(?<!:)/+', '/', url)
	450	+	url = re.sub("(?<!:)/+", "/", url)
402	451
403	452		# Don't make request if username is invalid for the site
404	453		if site.regex_check and re.search(site.regex_check, username) is None:
405	454		# No need to do the check at the site: this user name is not allowed.
406		-	results_site['status'] = QueryResult(username,
407		-	site_name,
408		-	url,
409		-	QueryStatus.ILLEGAL)
	455	+	results_site["status"] = QueryResult(
	456	+	username, site_name, url, QueryStatus.ILLEGAL
	457	+	)
410	458		results_site["url_user"] = ""
411		-	results_site['http_status'] = ""
412		-	results_site['response_text'] = ""
413		-	query_notify.update(results_site['status'])
	459	+	results_site["http_status"] = ""
	460	+	results_site["response_text"] = ""
	461	+	query_notify.update(results_site["status"])
414	462		else:
415	463		# URL of user on site (if it exists)
416	464		results_site["url_user"] = url
		skipped 11 lines
428	476		)
429	477
430	478		for k, v in site.get_params.items():
431		-	url_probe += f'&{k}={v}'
	479	+	url_probe += f"&{k}={v}"
432	480
433		-	if site.check_type == 'status_code' and site.request_head_only:
	481	+	if site.check_type == "status_code" and site.request_head_only:
434	482		# In most cases when we are detecting by status code,
435	483		# it is not necessary to get the entire body: we can
436	484		# detect fine with just the HEAD response.
		skipped 14 lines
451	499		# The final result of the request will be what is available.
452	500		allow_redirects = True
453	501
454		-	future = request_method(url=url_probe, headers=headers,
455		-	allow_redirects=allow_redirects,
456		-	timeout=timeout,
457		-	)
	502	+	future = request_method(
	503	+	url=url_probe,
	504	+	headers=headers,
	505	+	allow_redirects=allow_redirects,
	506	+	timeout=timeout,
	507	+	)
458	508
459	509		# Store future in data for access later
460	510		# TODO: move to separate obj
		skipped 4 lines
465	515
466	516		coroutines = []
467	517		for sitename, result_obj in results_total.items():
468		-	coroutines.append((update_site_dict_from_response, [sitename, site_dict, result_obj, logger, query_notify], {}))
	518	+	coroutines.append(
	519	+	(
	520	+	update_site_dict_from_response,
	521	+	[sitename, site_dict, result_obj, logger, query_notify],
	522	+	{},
	523	+	)
	524	+	)
469	525
470	526		if no_progressbar:
471	527		executor = AsyncioSimpleExecutor(logger=logger)
472	528		else:
473		-	executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=max_connections, timeout=timeout+0.5)
	529	+	executor = AsyncioProgressbarQueueExecutor(
	530	+	logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
	531	+	)
474	532
475	533		results = await executor.run(coroutines)
476	534
477	535		await session.close()
478	536
479		-	# TODO: move to separate function
480		-	errors = {}
481		-	for el in results:
482		-	if not el:
483		-	continue
484		-	_, r = el
485		-	if r and isinstance(r, dict) and r.get('status'):
486		-	if not isinstance(r['status'], QueryResult):
487		-	continue
488		-
489		-	err = r['status'].error
490		-	if not err:
491		-	continue
492		-	errors[err.type] = errors.get(err.type, 0) + 1
493		-
494		-	for err, count in sorted(errors.items(), key=lambda x: x[1], reverse=True):
495		-	logger.warning(f'Errors of type "{err}": {count}')
496		-
497	537		# Notify caller that all queries are finished.
498	538		query_notify.finish()
499	539
		skipped 37 lines
537	577
538	578		async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
539	579		changes = {
540		-	'disabled': False,
	580	+	"disabled": False,
541	581		}
542	582
543	583		try:
		skipped 6 lines
550	590		logger.error(site.__dict__)
551	591		check_data = []
552	592
553		-	logger.info(f'Checking {site.name}...')
	593	+	logger.info(f"Checking {site.name}...")
554	594
555	595		for username, status in check_data:
556	596		async with semaphore:
		skipped 11 lines
568	608		# TODO: make normal checking
569	609		if site.name not in results_dict:
570	610		logger.info(results_dict)
571		-	changes['disabled'] = True
	611	+	changes["disabled"] = True
572	612		continue
573	613
574		-	result = results_dict[site.name]['status']
	614	+	result = results_dict[site.name]["status"]
575	615
576	616		site_status = result.status
577	617
		skipped 2 lines
580	620		msgs = site.absence_strs
581	621		etype = site.check_type
582	622		logger.warning(
583		-	f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
	623	+	f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
	624	+	)
584	625		# don't disable in case of available username
585	626		if status == QueryStatus.CLAIMED:
586		-	changes['disabled'] = True
	627	+	changes["disabled"] = True
587	628		elif status == QueryStatus.CLAIMED:
588		-	logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
	629	+	logger.warning(
	630	+	f"Not found `{username}` in {site.name}, must be claimed"
	631	+	)
589	632		logger.info(results_dict[site.name])
590		-	changes['disabled'] = True
	633	+	changes["disabled"] = True
591	634		else:
592		-	logger.warning(f'Found `{username}` in {site.name}, must be available')
	635	+	logger.warning(f"Found `{username}` in {site.name}, must be available")
593	636		logger.info(results_dict[site.name])
594		-	changes['disabled'] = True
	637	+	changes["disabled"] = True
595	638
596		-	logger.info(f'Site {site.name} checking is finished')
	639	+	logger.info(f"Site {site.name} checking is finished")
597	640
598		-	if changes['disabled'] != site.disabled:
599		-	site.disabled = changes['disabled']
	641	+	if changes["disabled"] != site.disabled:
	642	+	site.disabled = changes["disabled"]
600	643		db.update_site(site)
601	644		if not silent:
602		-	action = 'Disabled' if site.disabled else 'Enabled'
603		-	print(f'{action} site {site.name}...')
	645	+	action = "Disabled" if site.disabled else "Enabled"
	646	+	print(f"{action} site {site.name}...")
604	647
605	648		return changes
606	649
607	650
608		-	async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
609		-	max_connections=10) -> bool:
	651	+	async def self_check(
	652	+	db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10
	653	+	) -> bool:
610	654		sem = asyncio.Semaphore(max_connections)
611	655		tasks = []
612	656		all_sites = site_data
		skipped 15 lines
628	672		total_disabled = disabled_new_count - disabled_old_count
629	673
630	674		if total_disabled >= 0:
631		-	message = 'Disabled'
	675	+	message = "Disabled"
632	676		else:
633		-	message = 'Enabled'
	677	+	message = "Enabled"
634	678		total_disabled *= -1
635	679
636	680		if not silent:
637	681		print(
638		-	f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
	682	+	f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
	683	+	"Run with `--info` flag to get more information"
	684	+	)
639	685
640	686		return total_disabled != 0
641	687

■ ■ ■ ■ ■ ■

maigret/errors.py

1	+	from typing import Dict, List, Any
2	+
3	+	from .result import QueryResult
4	+
5	+
6	+	# error got as a result of completed search query
7	+	class CheckError:
8	+	_type = 'Unknown'
9	+	_desc = ''
10	+
11	+	def __init__(self, typename, desc=''):
12	+	self._type = typename
13	+	self._desc = desc
14	+
15	+	def __str__(self):
16	+	if not self._desc:
17	+	return f'{self._type} error'
18	+
19	+	return f'{self._type} error: {self._desc}'
20	+
21	+	@property
22	+	def type(self):
23	+	return self._type
24	+
25	+	@property
26	+	def desc(self):
27	+	return self._desc
28	+
29	+
30	+	COMMON_ERRORS = {
31	+	'<title>Attention Required! \| Cloudflare</title>': CheckError(
32	+	'Captcha', 'Cloudflare'
33	+	),
34	+	'Please stand by, while we are checking your browser': CheckError(
35	+	'Bot protection', 'Cloudflare'
36	+	),
37	+	'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
38	+	'document.getElementById(\'validate_form_submit\').disabled=true': CheckError(
39	+	'Captcha', 'Mail.ru'
40	+	),
41	+	'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': CheckError(
42	+	'Bot protection', 'Blazingfast'
43	+	),
44	+	'404</h1><p class="error-card__description">Мы не нашли страницу': CheckError(
45	+	'Resolving', 'MegaFon 404 page'
46	+	),
47	+	'Доступ к информационному ресурсу ограничен на основании Федерального закона': CheckError(
48	+	'Censorship', 'MGTS'
49	+	),
50	+	'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
51	+	}
52	+
53	+	ERRORS_TYPES = {
54	+	'Captcha': 'Try to switch to another IP address or to use service cookies',
55	+	'Bot protection': 'Try to switch to another IP address',
56	+	'Censorship': 'switch to another internet service provider',
57	+	'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
58	+	}
59	+
60	+	THRESHOLD = 3 # percent
61	+
62	+
63	+	def is_important(err_data):
64	+	return err_data['perc'] >= THRESHOLD
65	+
66	+
67	+	def is_not_permanent(err_data):
68	+	return True
69	+
70	+
71	+	def detect(text):
72	+	for flag, err in COMMON_ERRORS.items():
73	+	if flag in text:
74	+	return err
75	+	return None
76	+
77	+
78	+	def solution_of(err_type) -> str:
79	+	return ERRORS_TYPES.get(err_type, '')
80	+
81	+
82	+	def extract_and_group(search_res: dict) -> List[Dict[str, Any]]:
83	+	errors_counts: Dict[str, int] = {}
84	+	for r in search_res:
85	+	if r and isinstance(r, dict) and r.get('status'):
86	+	if not isinstance(r['status'], QueryResult):
87	+	continue
88	+
89	+	err = r['status'].error
90	+	if not err:
91	+	continue
92	+	errors_counts[err.type] = errors_counts.get(err.type, 0) + 1
93	+
94	+	counts = []
95	+	for err, count in sorted(errors_counts.items(), key=lambda x: x[1], reverse=True):
96	+	counts.append(
97	+	{
98	+	'err': err,
99	+	'count': count,
100	+	'perc': round(count / len(search_res), 2) * 100,
101	+	}
102	+	)
103	+
104	+	return counts
105	+

■ ■ ■ ■ ■ ■

maigret/executors.py

		skipped 1 lines
2	2		import time
3	3		import tqdm
4	4		import sys
5		-	from typing import Iterable
	5	+	from typing import Iterable, Any, List
6	6
7	7		from .types import QueryDraft
8	8
		skipped 91 lines
100	100		self.queue.task_done()
101	101
102	102		async def _run(self, queries: Iterable[QueryDraft]):
103		-	self.results = []
	103	+	self.results: List[Any] = []
104	104
105	105		queries_list = list(queries)
106	106
107	107		min_workers = min(len(queries_list), self.workers_count)
108	108
109		-	workers = [create_task_func()(self.worker())
110		-	for _ in range(min_workers)]
	109	+	workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
111	110
112	111		self.progress = self.progress_func(total=len(queries_list))
113	112		for t in queries_list:
		skipped 7 lines

■ ■ ■ ■ ■ ■

maigret/maigret.py

		skipped 11 lines
12	12		import requests
13	13		from socid_extractor import extract, parse, __version__ as socid_version
14	14
15		-	from .checking import timeout_check, supported_recursive_search_ids, self_check, unsupported_characters, maigret
	15	+	from .checking import (
	16	+	timeout_check,
	17	+	supported_recursive_search_ids,
	18	+	self_check,
	19	+	unsupported_characters,
	20	+	maigret,
	21	+	)
	22	+	from . import errors
16	23		from .notify import QueryNotifyPrint
17		-	from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
18		-	generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
19		-	save_json_report
	24	+	from .report import (
	25	+	save_csv_report,
	26	+	save_xmind_report,
	27	+	save_html_report,
	28	+	save_pdf_report,
	29	+	generate_report_context,
	30	+	save_txt_report,
	31	+	SUPPORTED_JSON_REPORT_FORMATS,
	32	+	check_supported_json_format,
	33	+	save_json_report,
	34	+	)
20	35		from .sites import MaigretDatabase
21	36		from .submit import submit_dialog
22	37		from .utils import get_dict_ascii_tree
		skipped 1 lines
24	39		__version__ = '0.1.19'
25	40
26	41
	42	+	def notify_about_errors(search_results, query_notify):
	43	+	errs = errors.extract_and_group(search_results.values())
	44	+	was_errs_displayed = False
	45	+	for e in errs:
	46	+	if not errors.is_important(e):
	47	+	continue
	48	+	text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
	49	+	solution = errors.solution_of(e['err'])
	50	+	if solution:
	51	+	text = '. '.join([text, solution])
	52	+
	53	+	query_notify.warning(text, '!')
	54	+	was_errs_displayed = True
	55	+
	56	+	if was_errs_displayed:
	57	+	query_notify.warning(
	58	+	'You can see detailed site check errors with a flag `--print-errors`'
	59	+	)
	60	+
	61	+
27	62		async def main():
28		-	version_string = '\n'.join([
29		-	f'%(prog)s {__version__}',
30		-	f'Socid-extractor: {socid_version}',
31		-	f'Aiohttp: {aiohttp.__version__}',
32		-	f'Requests: {requests.__version__}',
33		-	f'Python: {platform.python_version()}',
34		-	])
	63	+	version_string = '\n'.join(
	64	+	[
	65	+	f'%(prog)s {__version__}',
	66	+	f'Socid-extractor: {socid_version}',
	67	+	f'Aiohttp: {aiohttp.__version__}',
	68	+	f'Requests: {requests.__version__}',
	69	+	f'Python: {platform.python_version()}',
	70	+	]
	71	+	)
35	72
36		-	parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
37		-	description=f"Maigret v{__version__}"
38		-	)
39		-	parser.add_argument("--version",
40		-	action="version", version=version_string,
41		-	help="Display version information and dependencies."
42		-	)
43		-	parser.add_argument("--info", "-vv",
44		-	action="store_true", dest="info", default=False,
45		-	help="Display service information."
46		-	)
47		-	parser.add_argument("--verbose", "-v",
48		-	action="store_true", dest="verbose", default=False,
49		-	help="Display extra information and metrics."
50		-	)
51		-	parser.add_argument("-d", "--debug", "-vvv",
52		-	action="store_true", dest="debug", default=False,
53		-	help="Saving debugging information and sites responses in debug.txt."
54		-	)
55		-	parser.add_argument("--site",
56		-	action="append", metavar='SITE_NAME',
57		-	dest="site_list", default=[],
58		-	help="Limit analysis to just the listed sites (use several times to specify more than one)"
59		-	)
60		-	parser.add_argument("--proxy", "-p", metavar='PROXY_URL',
61		-	action="store", dest="proxy", default=None,
62		-	help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
63		-	)
64		-	parser.add_argument("--db", metavar="DB_FILE",
65		-	dest="db_file", default=None,
66		-	help="Load Maigret database from a JSON file or an online, valid, JSON file.")
67		-	parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
68		-	dest="cookie_file", default=None,
69		-	help="File with cookies.")
70		-	parser.add_argument("--timeout",
71		-	action="store", metavar='TIMEOUT',
72		-	dest="timeout", type=timeout_check, default=10,
73		-	help="Time (in seconds) to wait for response to requests."
74		-	"Default timeout of 10.0s. "
75		-	"A longer timeout will be more likely to get results from slow sites."
76		-	"On the other hand, this may cause a long delay to gather all results."
77		-	)
78		-	parser.add_argument("-n", "--max-connections",
79		-	action="store", type=int,
80		-	dest="connections", default=100,
81		-	help="Allowed number of concurrent connections."
82		-	)
83		-	parser.add_argument("-a", "--all-sites",
84		-	action="store_true", dest="all_sites", default=False,
85		-	help="Use all sites for scan."
86		-	)
87		-	parser.add_argument("--top-sites",
88		-	action="store", default=500, type=int,
89		-	help="Count of sites for scan ranked by Alexa Top (default: 500)."
90		-	)
91		-	parser.add_argument("--print-not-found",
92		-	action="store_true", dest="print_not_found", default=False,
93		-	help="Print sites where the username was not found."
94		-	)
95		-	parser.add_argument("--print-errors",
96		-	action="store_true", dest="print_check_errors", default=False,
97		-	help="Print errors messages: connection, captcha, site country ban, etc."
98		-	)
99		-	parser.add_argument("--submit", metavar='EXISTING_USER_URL',
100		-	type=str, dest="new_site_to_submit", default=False,
101		-	help="URL of existing profile in new site to submit."
102		-	)
103		-	parser.add_argument("--no-color",
104		-	action="store_true", dest="no_color", default=False,
105		-	help="Don't color terminal output"
106		-	)
107		-	parser.add_argument("--no-progressbar",
108		-	action="store_true", dest="no_progressbar", default=False,
109		-	help="Don't show progressbar."
110		-	)
111		-	parser.add_argument("--browse", "-b",
112		-	action="store_true", dest="browse", default=False,
113		-	help="Browse to all results on default bowser."
114		-	)
115		-	parser.add_argument("--no-recursion",
116		-	action="store_true", dest="disable_recursive_search", default=False,
117		-	help="Disable recursive search by additional data extracted from pages."
118		-	)
119		-	parser.add_argument("--no-extracting",
120		-	action="store_true", dest="disable_extracting", default=False,
121		-	help="Disable parsing pages for additional data and other usernames."
122		-	)
123		-	parser.add_argument("--self-check",
124		-	action="store_true", default=False,
125		-	help="Do self check for sites and database and disable non-working ones."
126		-	)
127		-	parser.add_argument("--stats",
128		-	action="store_true", default=False,
129		-	help="Show database statistics."
130		-	)
131		-	parser.add_argument("--use-disabled-sites",
132		-	action="store_true", default=False,
133		-	help="Use disabled sites to search (may cause many false positives)."
134		-	)
135		-	parser.add_argument("--parse",
136		-	dest="parse_url", default='',
137		-	help="Parse page by URL and extract username and IDs to use for search."
138		-	)
139		-	parser.add_argument("--id-type",
140		-	dest="id_type", default='username',
141		-	help="Specify identifier(s) type (default: username)."
142		-	)
143		-	parser.add_argument("--ignore-ids",
144		-	action="append", metavar='IGNORED_IDS',
145		-	dest="ignore_ids_list", default=[],
146		-	help="Do not make search by the specified username or other ids."
147		-	)
148		-	parser.add_argument("username",
149		-	nargs='+', metavar='USERNAMES',
150		-	action="store",
151		-	help="One or more usernames to check with social networks."
152		-	)
153		-	parser.add_argument("--tags",
154		-	dest="tags", default='',
155		-	help="Specify tags of sites."
156		-	)
	73	+	parser = ArgumentParser(
	74	+	formatter_class=RawDescriptionHelpFormatter,
	75	+	description=f"Maigret v{__version__}",
	76	+	)
	77	+	parser.add_argument(
	78	+	"--version",
	79	+	action="version",
	80	+	version=version_string,
	81	+	help="Display version information and dependencies.",
	82	+	)
	83	+	parser.add_argument(
	84	+	"--info",
	85	+	"-vv",
	86	+	action="store_true",
	87	+	dest="info",
	88	+	default=False,
	89	+	help="Display service information.",
	90	+	)
	91	+	parser.add_argument(
	92	+	"--verbose",
	93	+	"-v",
	94	+	action="store_true",
	95	+	dest="verbose",
	96	+	default=False,
	97	+	help="Display extra information and metrics.",
	98	+	)
	99	+	parser.add_argument(
	100	+	"-d",
	101	+	"--debug",
	102	+	"-vvv",
	103	+	action="store_true",
	104	+	dest="debug",
	105	+	default=False,
	106	+	help="Saving debugging information and sites responses in debug.txt.",
	107	+	)
	108	+	parser.add_argument(
	109	+	"--site",
	110	+	action="append",
	111	+	metavar='SITE_NAME',
	112	+	dest="site_list",
	113	+	default=[],
	114	+	help="Limit analysis to just the listed sites (use several times to specify more than one)",
	115	+	)
	116	+	parser.add_argument(
	117	+	"--proxy",
	118	+	"-p",
	119	+	metavar='PROXY_URL',
	120	+	action="store",
	121	+	dest="proxy",
	122	+	default=None,
	123	+	help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
	124	+	)
	125	+	parser.add_argument(
	126	+	"--db",
	127	+	metavar="DB_FILE",
	128	+	dest="db_file",
	129	+	default=None,
	130	+	help="Load Maigret database from a JSON file or an online, valid, JSON file.",
	131	+	)
	132	+	parser.add_argument(
	133	+	"--cookies-jar-file",
	134	+	metavar="COOKIE_FILE",
	135	+	dest="cookie_file",
	136	+	default=None,
	137	+	help="File with cookies.",
	138	+	)
	139	+	parser.add_argument(
	140	+	"--timeout",
	141	+	action="store",
	142	+	metavar='TIMEOUT',
	143	+	dest="timeout",
	144	+	type=timeout_check,
	145	+	default=30,
	146	+	help="Time (in seconds) to wait for response to requests. "
	147	+	"Default timeout of 30.0s. "
	148	+	"A longer timeout will be more likely to get results from slow sites. "
	149	+	"On the other hand, this may cause a long delay to gather all results. ",
	150	+	)
	151	+	parser.add_argument(
	152	+	"-n",
	153	+	"--max-connections",
	154	+	action="store",
	155	+	type=int,
	156	+	dest="connections",
	157	+	default=100,
	158	+	help="Allowed number of concurrent connections.",
	159	+	)
	160	+	parser.add_argument(
	161	+	"-a",
	162	+	"--all-sites",
	163	+	action="store_true",
	164	+	dest="all_sites",
	165	+	default=False,
	166	+	help="Use all sites for scan.",
	167	+	)
	168	+	parser.add_argument(
	169	+	"--top-sites",
	170	+	action="store",
	171	+	default=500,
	172	+	type=int,
	173	+	help="Count of sites for scan ranked by Alexa Top (default: 500).",
	174	+	)
	175	+	parser.add_argument(
	176	+	"--print-not-found",
	177	+	action="store_true",
	178	+	dest="print_not_found",
	179	+	default=False,
	180	+	help="Print sites where the username was not found.",
	181	+	)
	182	+	parser.add_argument(
	183	+	"--print-errors",
	184	+	action="store_true",
	185	+	dest="print_check_errors",
	186	+	default=False,
	187	+	help="Print errors messages: connection, captcha, site country ban, etc.",
	188	+	)
	189	+	parser.add_argument(
	190	+	"--submit",
	191	+	metavar='EXISTING_USER_URL',
	192	+	type=str,
	193	+	dest="new_site_to_submit",
	194	+	default=False,
	195	+	help="URL of existing profile in new site to submit.",
	196	+	)
	197	+	parser.add_argument(
	198	+	"--no-color",
	199	+	action="store_true",
	200	+	dest="no_color",
	201	+	default=False,
	202	+	help="Don't color terminal output",
	203	+	)
	204	+	parser.add_argument(
	205	+	"--no-progressbar",
	206	+	action="store_true",
	207	+	dest="no_progressbar",
	208	+	default=False,
	209	+	help="Don't show progressbar.",
	210	+	)
	211	+	parser.add_argument(
	212	+	"--browse",
	213	+	"-b",
	214	+	action="store_true",
	215	+	dest="browse",
	216	+	default=False,
	217	+	help="Browse to all results on default bowser.",
	218	+	)
	219	+	parser.add_argument(
	220	+	"--no-recursion",
	221	+	action="store_true",
	222	+	dest="disable_recursive_search",
	223	+	default=False,
	224	+	help="Disable recursive search by additional data extracted from pages.",
	225	+	)
	226	+	parser.add_argument(
	227	+	"--no-extracting",
	228	+	action="store_true",
	229	+	dest="disable_extracting",
	230	+	default=False,
	231	+	help="Disable parsing pages for additional data and other usernames.",
	232	+	)
	233	+	parser.add_argument(
	234	+	"--self-check",
	235	+	action="store_true",
	236	+	default=False,
	237	+	help="Do self check for sites and database and disable non-working ones.",
	238	+	)
	239	+	parser.add_argument(
	240	+	"--stats", action="store_true", default=False, help="Show database statistics."
	241	+	)
	242	+	parser.add_argument(
	243	+	"--use-disabled-sites",
	244	+	action="store_true",
	245	+	default=False,
	246	+	help="Use disabled sites to search (may cause many false positives).",
	247	+	)
	248	+	parser.add_argument(
	249	+	"--parse",
	250	+	dest="parse_url",
	251	+	default='',
	252	+	help="Parse page by URL and extract username and IDs to use for search.",
	253	+	)
	254	+	parser.add_argument(
	255	+	"--id-type",
	256	+	dest="id_type",
	257	+	default='username',
	258	+	help="Specify identifier(s) type (default: username).",
	259	+	)
	260	+	parser.add_argument(
	261	+	"--ignore-ids",
	262	+	action="append",
	263	+	metavar='IGNORED_IDS',
	264	+	dest="ignore_ids_list",
	265	+	default=[],
	266	+	help="Do not make search by the specified username or other ids.",
	267	+	)
	268	+	parser.add_argument(
	269	+	"username",
	270	+	nargs='+',
	271	+	metavar='USERNAMES',
	272	+	action="store",
	273	+	help="One or more usernames to check with social networks.",
	274	+	)
	275	+	parser.add_argument(
	276	+	"--tags", dest="tags", default='', help="Specify tags of sites."
	277	+	)
157	278		# reports options
158		-	parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports",
159		-	help="If using multiple usernames, the output of the results will be saved to this folder."
160		-	)
161		-	parser.add_argument("-T", "--txt",
162		-	action="store_true", dest="txt", default=False,
163		-	help="Create a TXT report (one report per username)."
164		-	)
165		-	parser.add_argument("-C", "--csv",
166		-	action="store_true", dest="csv", default=False,
167		-	help="Create a CSV report (one report per username)."
168		-	)
169		-	parser.add_argument("-H", "--html",
170		-	action="store_true", dest="html", default=False,
171		-	help="Create an HTML report file (general report on all usernames)."
172		-	)
173		-	parser.add_argument("-X", "--xmind",
174		-	action="store_true",
175		-	dest="xmind", default=False,
176		-	help="Generate an XMind 8 mindmap report (one report per username)."
177		-	)
178		-	parser.add_argument("-P", "--pdf",
179		-	action="store_true",
180		-	dest="pdf", default=False,
181		-	help="Generate a PDF report (general report on all usernames)."
182		-	)
183		-	parser.add_argument("-J", "--json",
184		-	action="store", metavar='REPORT_TYPE',
185		-	dest="json", default='', type=check_supported_json_format,
186		-	help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
187		-	" (one report per username)."
188		-	)
	279	+	parser.add_argument(
	280	+	"--folderoutput",
	281	+	"-fo",
	282	+	dest="folderoutput",
	283	+	default="reports",
	284	+	help="If using multiple usernames, the output of the results will be saved to this folder.",
	285	+	)
	286	+	parser.add_argument(
	287	+	"-T",
	288	+	"--txt",
	289	+	action="store_true",
	290	+	dest="txt",
	291	+	default=False,
	292	+	help="Create a TXT report (one report per username).",
	293	+	)
	294	+	parser.add_argument(
	295	+	"-C",
	296	+	"--csv",
	297	+	action="store_true",
	298	+	dest="csv",
	299	+	default=False,
	300	+	help="Create a CSV report (one report per username).",
	301	+	)
	302	+	parser.add_argument(
	303	+	"-H",
	304	+	"--html",
	305	+	action="store_true",
	306	+	dest="html",
	307	+	default=False,
	308	+	help="Create an HTML report file (general report on all usernames).",
	309	+	)
	310	+	parser.add_argument(
	311	+	"-X",
	312	+	"--xmind",
	313	+	action="store_true",
	314	+	dest="xmind",
	315	+	default=False,
	316	+	help="Generate an XMind 8 mindmap report (one report per username).",
	317	+	)
	318	+	parser.add_argument(
	319	+	"-P",
	320	+	"--pdf",
	321	+	action="store_true",
	322	+	dest="pdf",
	323	+	default=False,
	324	+	help="Generate a PDF report (general report on all usernames).",
	325	+	)
	326	+	parser.add_argument(
	327	+	"-J",
	328	+	"--json",
	329	+	action="store",
	330	+	metavar='REPORT_TYPE',
	331	+	dest="json",
	332	+	default='',
	333	+	type=check_supported_json_format,
	334	+	help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
	335	+	" (one report per username).",
	336	+	)
189	337
190	338		args = parser.parse_args()
191	339
		skipped 2 lines
194	342		logging.basicConfig(
195	343		format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
196	344		datefmt='%H:%M:%S',
197		-	level=log_level
	345	+	level=log_level,
198	346		)
199	347
200	348		if args.debug:
		skipped 10 lines
211	359		usernames = {
212	360		u: args.id_type
213	361		for u in args.username
214		-	if u not in ['-']
215		-	and u not in args.ignore_ids_list
	362	+	if u not in ['-'] and u not in args.ignore_ids_list
216	363		}
217	364
218	365		parsing_enabled = not args.disable_extracting
		skipped 9 lines
228	375		try:
229	376		# temporary workaround for URL mutations MVP
230	377		from socid_extractor import mutate_url
	378	+
231	379		reqs += list(mutate_url(args.parse_url))
232		-	except:
	380	+	except Exception as e:
	381	+	logger.warning(e)
233	382		pass
234	383
235	384		for req in reqs:
		skipped 15 lines
251	400		args.tags = list(set(str(args.tags).split(',')))
252	401
253	402		if args.db_file is None:
254		-	args.db_file = \
255		-	os.path.join(os.path.dirname(os.path.realpath(__file__)),
256		-	"resources/data.json"
257		-	)
	403	+	args.db_file = os.path.join(
	404	+	os.path.dirname(os.path.realpath(__file__)), "resources/data.json"
	405	+	)
258	406
259	407		if args.top_sites == 0 or args.all_sites:
260	408		args.top_sites = sys.maxsize
261	409
262	410		# Create notify object for query results.
263		-	query_notify = QueryNotifyPrint(result=None,
264		-	verbose=args.verbose,
265		-	print_found_only=not args.print_not_found,
266		-	skip_check_errors=not args.print_check_errors,
267		-	color=not args.no_color)
	411	+	query_notify = QueryNotifyPrint(
	412	+	result=None,
	413	+	verbose=args.verbose,
	414	+	print_found_only=not args.print_not_found,
	415	+	skip_check_errors=not args.print_check_errors,
	416	+	color=not args.no_color,
	417	+	)
268	418
269	419		# Create object with all information about sites we are aware of.
270	420		db = MaigretDatabase().load_from_file(args.db_file)
271		-	get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
272		-	names=args.site_list,
273		-	disabled=False, id_type=x)
	421	+	get_top_sites_for_id = lambda x: db.ranked_sites_dict(
	422	+	top=args.top_sites,
	423	+	tags=args.tags,
	424	+	names=args.site_list,
	425	+	disabled=False,
	426	+	id_type=x,
	427	+	)
274	428
275	429		site_data = get_top_sites_for_id(args.id_type)
276	430
277	431		if args.new_site_to_submit:
278		-	is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file, logger)
	432	+	is_submitted = await submit_dialog(
	433	+	db, args.new_site_to_submit, args.cookie_file, logger
	434	+	)
279	435		if is_submitted:
280	436		db.save_to_file(args.db_file)
281	437
282	438		# Database self-checking
283	439		if args.self_check:
284	440		print('Maigret sites database self-checking...')
285		-	is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
	441	+	is_need_update = await self_check(
	442	+	db, site_data, logger, max_connections=args.connections
	443	+	)
286	444		if is_need_update:
287	445		if input('Do you want to save changes permanently? [Yn]\n').lower() == 'y':
288	446		db.save_to_file(args.db_file)
		skipped 25 lines
314	472		query_notify.warning('No sites to check, exiting!')
315	473		sys.exit(2)
316	474		else:
317		-	query_notify.warning(f'Starting a search on top {len(site_data)} sites from the Maigret database...')
	475	+	query_notify.warning(
	476	+	f'Starting a search on top {len(site_data)} sites from the Maigret database...'
	477	+	)
318	478		if not args.all_sites:
319		-	query_notify.warning(f'You can run search by full list of sites with flag `-a`', '!')
	479	+	query_notify.warning(
	480	+	'You can run search by full list of sites with flag `-a`', '!'
	481	+	)
320	482
321	483		already_checked = set()
322	484		general_results = []
		skipped 8 lines
331	493		already_checked.add(username.lower())
332	494
333	495		if username in args.ignore_ids_list:
334		-	query_notify.warning(f'Skip a search by username {username} cause it\'s marked as ignored.')
	496	+	query_notify.warning(
	497	+	f'Skip a search by username {username} cause it\'s marked as ignored.'
	498	+	)
335	499		continue
336	500
337	501		# check for characters do not supported by sites generally
338		-	found_unsupported_chars = set(unsupported_characters).intersection(set(username))
	502	+	found_unsupported_chars = set(unsupported_characters).intersection(
	503	+	set(username)
	504	+	)
339	505
340	506		if found_unsupported_chars:
341		-	pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
	507	+	pretty_chars_str = ','.join(
	508	+	map(lambda s: f'"{s}"', found_unsupported_chars)
	509	+	)
342	510		query_notify.warning(
343		-	f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
	511	+	f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"'
	512	+	)
344	513		continue
345	514
346	515		sites_to_check = get_top_sites_for_id(id_type)
347	516
348		-	results = await maigret(username=username,
349		-	site_dict=dict(sites_to_check),
350		-	query_notify=query_notify,
351		-	proxy=args.proxy,
352		-	timeout=args.timeout,
353		-	is_parsing_enabled=parsing_enabled,
354		-	id_type=id_type,
355		-	debug=args.verbose,
356		-	logger=logger,
357		-	cookies=args.cookie_file,
358		-	forced=args.use_disabled_sites,
359		-	max_connections=args.connections,
360		-	no_progressbar=args.no_progressbar,
361		-	)
	517	+	results = await maigret(
	518	+	username=username,
	519	+	site_dict=dict(sites_to_check),
	520	+	query_notify=query_notify,
	521	+	proxy=args.proxy,
	522	+	timeout=args.timeout,
	523	+	is_parsing_enabled=parsing_enabled,
	524	+	id_type=id_type,
	525	+	debug=args.verbose,
	526	+	logger=logger,
	527	+	cookies=args.cookie_file,
	528	+	forced=args.use_disabled_sites,
	529	+	max_connections=args.connections,
	530	+	no_progressbar=args.no_progressbar,
	531	+	)
	532	+
	533	+	notify_about_errors(results, query_notify)
362	534
363	535		general_results.append((username, id_type, results))
364	536
		skipped 32 lines
397	569		query_notify.warning(f'TXT report for {username} saved in {filename}')
398	570
399	571		if args.json:
400		-	filename = report_filepath_tpl.format(username=username, postfix=f'_{args.json}.json')
	572	+	filename = report_filepath_tpl.format(
	573	+	username=username, postfix=f'_{args.json}.json'
	574	+	)
401	575		save_json_report(filename, username, results, report_type=args.json)
402		-	query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
	576	+	query_notify.warning(
	577	+	f'JSON {args.json} report for {username} saved in {filename}'
	578	+	)
403	579
404	580		# reporting for all the result
405	581		if general_results:
		skipped 31 lines

■ ■ ■ ■ ■ ■

maigret/notify.py

		skipped 10 lines
11	11		from .utils import get_dict_ascii_tree
12	12
13	13
14		-	class QueryNotify():
	14	+	class QueryNotify:
15	15		"""Query Notify Object.
16	16
17	17		Base class that describes methods available to notify the results of
		skipped 21 lines
39	39
40	40		return
41	41
42		-	def start(self, message=None, id_type='username'):
	42	+	def start(self, message=None, id_type="username"):
43	43		"""Notify Start.
44	44
45	45		Notify method for start of query. This method will be called before
		skipped 70 lines
116	116		Query notify class that prints results.
117	117		"""
118	118
119		-	def __init__(self, result=None, verbose=False, print_found_only=False,
120		-	skip_check_errors=False, color=True):
	119	+	def __init__(
	120	+	self,
	121	+	result=None,
	122	+	verbose=False,
	123	+	print_found_only=False,
	124	+	skip_check_errors=False,
	125	+	color=True,
	126	+	):
121	127		"""Create Query Notify Print Object.
122	128
123	129		Contains information about a specific method of notifying the results
		skipped 38 lines
162	168
163	169		title = f"Checking {id_type}"
164	170		if self.color:
165		-	print(Style.BRIGHT + Fore.GREEN + "[" +
166		-	Fore.YELLOW + "*" +
167		-	Fore.GREEN + f"] {title}" +
168		-	Fore.WHITE + f" {message}" +
169		-	Fore.GREEN + " on:")
	171	+	print(
	172	+	Style.BRIGHT
	173	+	+ Fore.GREEN
	174	+	+ "["
	175	+	+ Fore.YELLOW
	176	+	+ "*"
	177	+	+ Fore.GREEN
	178	+	+ f"] {title}"
	179	+	+ Fore.WHITE
	180	+	+ f" {message}"
	181	+	+ Fore.GREEN
	182	+	+ " on:"
	183	+	)
170	184		else:
171	185		print(f"[*] {title} {message} on:")
172	186
173		-	def warning(self, message, symbol='-'):
174		-	msg = f'[{symbol}] {message}'
	187	+	def warning(self, message, symbol="-"):
	188	+	msg = f"[{symbol}] {message}"
175	189		if self.color:
176	190		print(Style.BRIGHT + Fore.YELLOW + msg)
177	191		else:
178	192		print(msg)
179		-
180	193
181	194		def update(self, result, is_similar=False):
182	195		"""Notify Update.
		skipped 13 lines
196	209		if not self.result.ids_data:
197	210		ids_data_text = ""
198	211		else:
199		-	ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), ' ')
	212	+	ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
200	213
201		-	def make_colored_terminal_notify(status, text, status_color, text_color, appendix):
	214	+	def make_colored_terminal_notify(
	215	+	status, text, status_color, text_color, appendix
	216	+	):
202	217		text = [
203		-	f'{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]' +
204		-	f'{text_color} {text}: {Style.RESET_ALL}' +
205		-	f'{appendix}'
	218	+	f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
	219	+	+ f"{text_color} {text}: {Style.RESET_ALL}"
	220	+	+ f"{appendix}"
206	221		]
207		-	return ''.join(text)
	222	+	return "".join(text)
208	223
209	224		def make_simple_terminal_notify(status, text, appendix):
210		-	return f'[{status}] {text}: {appendix}'
	225	+	return f"[{status}] {text}: {appendix}"
211	226
212	227		def make_terminal_notify(is_colored=True, *args):
213	228		if is_colored:
		skipped 6 lines
220	235		# Output to the terminal is desired.
221	236		if result.status == QueryStatus.CLAIMED:
222	237		color = Fore.BLUE if is_similar else Fore.GREEN
223		-	status = '?' if is_similar else '+'
	238	+	status = "?" if is_similar else "+"
224	239		notify = make_terminal_notify(
225	240		self.color,
226		-	status, result.site_name,
227		-	color, color,
228		-	result.site_url_user + ids_data_text
	241	+	status,
	242	+	result.site_name,
	243	+	color,
	244	+	color,
	245	+	result.site_url_user + ids_data_text,
229	246		)
230	247		elif result.status == QueryStatus.AVAILABLE:
231	248		if not self.print_found_only:
232	249		notify = make_terminal_notify(
233	250		self.color,
234		-	'-', result.site_name,
235		-	Fore.RED, Fore.YELLOW,
236		-	'Not found!' + ids_data_text
	251	+	"-",
	252	+	result.site_name,
	253	+	Fore.RED,
	254	+	Fore.YELLOW,
	255	+	"Not found!" + ids_data_text,
237	256		)
238	257		elif result.status == QueryStatus.UNKNOWN:
239	258		if not self.skip_check_errors:
240	259		notify = make_terminal_notify(
241	260		self.color,
242		-	'?', result.site_name,
243		-	Fore.RED, Fore.RED,
244		-	str(self.result.error) + ids_data_text
	261	+	"?",
	262	+	result.site_name,
	263	+	Fore.RED,
	264	+	Fore.RED,
	265	+	str(self.result.error) + ids_data_text,
245	266		)
246	267		elif result.status == QueryStatus.ILLEGAL:
247	268		if not self.print_found_only:
248		-	text = 'Illegal Username Format For This Site!'
	269	+	text = "Illegal Username Format For This Site!"
249	270		notify = make_terminal_notify(
250	271		self.color,
251		-	'-', result.site_name,
252		-	Fore.RED, Fore.YELLOW,
253		-	text + ids_data_text
	272	+	"-",
	273	+	result.site_name,
	274	+	Fore.RED,
	275	+	Fore.YELLOW,
	276	+	text + ids_data_text,
254	277		)
255	278		else:
256	279		# It should be impossible to ever get here...
257		-	raise ValueError(f"Unknown Query Status '{str(result.status)}' for "
258		-	f"site '{self.result.site_name}'")
	280	+	raise ValueError(
	281	+	f"Unknown Query Status '{str(result.status)}' for "
	282	+	f"site '{self.result.site_name}'"
	283	+	)
259	284
260	285		if notify:
261		-	sys.stdout.write('\x1b[1K\r')
	286	+	sys.stdout.write("\x1b[1K\r")
262	287		print(notify)
263	288
264	289		return
		skipped 14 lines

■ ■ ■ ■ ■ ■

maigret/report.py

		skipped 4 lines
5	5		import os
6	6		from argparse import ArgumentTypeError
7	7		from datetime import datetime
	8	+	from typing import Dict, Any
8	9
9	10		import pycountry
10	11		import xmind
		skipped 5 lines
16	17		from .utils import is_country_tag, CaseConverter, enrich_link_str
17	18
18	19		SUPPORTED_JSON_REPORT_FORMATS = [
19		-	'simple',
20		-	'ndjson',
	20	+	"simple",
	21	+	"ndjson",
21	22		]
22	23
23		-	'''
	24	+	"""
24	25		UTILS
25		-	'''
	26	+	"""
26	27
27	28
28	29		def filter_supposed_data(data):
29		-	### interesting fields
30		-	allowed_fields = ['fullname', 'gender', 'location', 'age']
31		-	filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
32		-	for k, v in data.items()
33		-	if k in allowed_fields}
	30	+	# interesting fields
	31	+	allowed_fields = ["fullname", "gender", "location", "age"]
	32	+	filtered_supposed_data = {
	33	+	CaseConverter.snake_to_title(k): v[0]
	34	+	for k, v in data.items()
	35	+	if k in allowed_fields
	36	+	}
34	37		return filtered_supposed_data
35	38
36	39
37		-	'''
	40	+	"""
38	41		REPORTS SAVING
39		-	'''
	42	+	"""
40	43
41	44
42	45		def save_csv_report(filename: str, username: str, results: dict):
43		-	with open(filename, 'w', newline='', encoding='utf-8') as f:
	46	+	with open(filename, "w", newline="", encoding="utf-8") as f:
44	47		generate_csv_report(username, results, f)
45	48
46	49
47	50		def save_txt_report(filename: str, username: str, results: dict):
48		-	with open(filename, 'w', encoding='utf-8') as f:
	51	+	with open(filename, "w", encoding="utf-8") as f:
49	52		generate_txt_report(username, results, f)
50	53
51	54
52	55		def save_html_report(filename: str, context: dict):
53	56		template, _ = generate_report_template(is_pdf=False)
54	57		filled_template = template.render(**context)
55		-	with open(filename, 'w') as f:
	58	+	with open(filename, "w") as f:
56	59		f.write(filled_template)
57	60
58	61
59	62		def save_pdf_report(filename: str, context: dict):
60	63		template, css = generate_report_template(is_pdf=True)
61	64		filled_template = template.render(**context)
62		-	with open(filename, 'w+b') as f:
	65	+	with open(filename, "w+b") as f:
63	66		pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
64	67
65	68
66	69		def save_json_report(filename: str, username: str, results: dict, report_type: str):
67		-	with open(filename, 'w', encoding='utf-8') as f:
	70	+	with open(filename, "w", encoding="utf-8") as f:
68	71		generate_json_report(username, results, f, report_type=report_type)
69	72
70	73
71		-	'''
	74	+	"""
72	75		REPORTS GENERATING
73		-	'''
	76	+	"""
74	77
75	78
76	79		def generate_report_template(is_pdf: bool):
77	80		"""
78		-	HTML/PDF template generation
	81	+	HTML/PDF template generation
79	82		"""
80	83
81	84		def get_resource_content(filename):
82		-	return open(os.path.join(maigret_path, 'resources', filename)).read()
	85	+	return open(os.path.join(maigret_path, "resources", filename)).read()
83	86
84	87		maigret_path = os.path.dirname(os.path.realpath(__file__))
85	88
86	89		if is_pdf:
87		-	template_content = get_resource_content('simple_report_pdf.tpl')
88		-	css_content = get_resource_content('simple_report_pdf.css')
	90	+	template_content = get_resource_content("simple_report_pdf.tpl")
	91	+	css_content = get_resource_content("simple_report_pdf.css")
89	92		else:
90		-	template_content = get_resource_content('simple_report.tpl')
	93	+	template_content = get_resource_content("simple_report.tpl")
91	94		css_content = None
92	95
93	96		template = Template(template_content)
94		-	template.globals['title'] = CaseConverter.snake_to_title
95		-	template.globals['detect_link'] = enrich_link_str
	97	+	template.globals["title"] = CaseConverter.snake_to_title # type: ignore
	98	+	template.globals["detect_link"] = enrich_link_str # type: ignore
96	99		return template, css_content
97	100
98	101
		skipped 1 lines
100	103		brief_text = []
101	104		usernames = {}
102	105		extended_info_count = 0
103		-	tags = {}
104		-	supposed_data = {}
	106	+	tags: Dict[str, int] = {}
	107	+	supposed_data: Dict[str, Any] = {}
105	108
106	109		first_seen = None
107	110
108	111		for username, id_type, results in username_results:
109	112		found_accounts = 0
110	113		new_ids = []
111		-	usernames[username] = {'type': id_type}
	114	+	usernames[username] = {"type": id_type}
112	115
113	116		for website_name in results:
114	117		dictionary = results[website_name]
		skipped 1 lines
116	119		if not dictionary:
117	120		continue
118	121
119		-	if dictionary.get('is_similar'):
	122	+	if dictionary.get("is_similar"):
120	123		continue
121	124
122		-	status = dictionary.get('status')
	125	+	status = dictionary.get("status")
123	126		if not status: # FIXME: currently in case of timeout
124	127		continue
125	128
126	129		if status.ids_data:
127		-	dictionary['ids_data'] = status.ids_data
	130	+	dictionary["ids_data"] = status.ids_data
128	131		extended_info_count += 1
129	132
130	133		# detect first seen
131		-	created_at = status.ids_data.get('created_at')
	134	+	created_at = status.ids_data.get("created_at")
132	135		if created_at:
133	136		if first_seen is None:
134	137		first_seen = created_at
		skipped 3 lines
138	141		new_time = parse_datetime_str(created_at)
139	142		if new_time < known_time:
140	143		first_seen = created_at
141		-	except:
142		-	logging.debug('Problems with converting datetime %s/%s', first_seen, created_at)
	144	+	except Exception as e:
	145	+	logging.debug(
	146	+	"Problems with converting datetime %s/%s: %s",
	147	+	first_seen,
	148	+	created_at,
	149	+	str(e),
	150	+	)
143	151
144	152		for k, v in status.ids_data.items():
145	153		# suppose target data
146		-	field = 'fullname' if k == 'name' else k
147		-	if not field in supposed_data:
	154	+	field = "fullname" if k == "name" else k
	155	+	if field not in supposed_data:
148	156		supposed_data[field] = []
149	157		supposed_data[field].append(v)
150	158		# suppose country
151		-	if k in ['country', 'locale']:
	159	+	if k in ["country", "locale"]:
152	160		try:
153	161		if is_country_tag(k):
154	162		tag = pycountry.countries.get(alpha_2=v).alpha_2.lower()
155	163		else:
156		-	tag = pycountry.countries.search_fuzzy(v)[0].alpha_2.lower()
	164	+	tag = pycountry.countries.search_fuzzy(v)[
	165	+	0
	166	+	].alpha_2.lower()
157	167		# TODO: move countries to another struct
158	168		tags[tag] = tags.get(tag, 0) + 1
159	169		except Exception as e:
160		-	logging.debug('pycountry exception', exc_info=True)
	170	+	logging.debug(
	171	+	"Pycountry exception: %s", str(e), exc_info=True
	172	+	)
161	173
162		-	new_usernames = dictionary.get('ids_usernames')
	174	+	new_usernames = dictionary.get("ids_usernames")
163	175		if new_usernames:
164	176		for u, utype in new_usernames.items():
165		-	if not u in usernames:
	177	+	if u not in usernames:
166	178		new_ids.append((u, utype))
167		-	usernames[u] = {'type': utype}
	179	+	usernames[u] = {"type": utype}
168	180
169	181		if status.status == QueryStatus.CLAIMED:
170	182		found_accounts += 1
171		-	dictionary['found'] = True
	183	+	dictionary["found"] = True
172	184		else:
173	185		continue
174	186
		skipped 2 lines
177	189		for t in status.tags:
178	190		tags[t] = tags.get(t, 0) + 1
179	191
180		-	brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
	192	+	brief_text.append(
	193	+	f"Search by {id_type} {username} returned {found_accounts} accounts."
	194	+	)
181	195
182	196		if new_ids:
183	197		ids_list = []
184	198		for u, t in new_ids:
185		-	ids_list.append(f'{u} ({t})' if t != 'username' else u)
186		-	brief_text.append(f'Found target\'s other IDs: ' + ', '.join(ids_list) + '.')
	199	+	ids_list.append(f"{u} ({t})" if t != "username" else u)
	200	+	brief_text.append("Found target's other IDs: " + ", ".join(ids_list) + ".")
187	201
188		-	brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
	202	+	brief_text.append(f"Extended info extracted from {extended_info_count} accounts.")
189	203
190		-	brief = ' '.join(brief_text).strip()
	204	+	brief = " ".join(brief_text).strip()
191	205		tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
192	206
193		-	if 'global' in tags:
	207	+	if "global" in tags:
194	208		# remove tag 'global' useless for country detection
195		-	del tags['global']
	209	+	del tags["global"]
196	210
197	211		first_username = username_results[0][0]
198	212		countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
		skipped 2 lines
201	215		filtered_supposed_data = filter_supposed_data(supposed_data)
202	216
203	217		return {
204		-	'username': first_username,
205		-	'brief': brief,
206		-	'results': username_results,
207		-	'first_seen': first_seen,
208		-	'interests_tuple_list': tuple_sort(interests_list),
209		-	'countries_tuple_list': tuple_sort(countries_lists),
210		-	'supposed_data': filtered_supposed_data,
211		-	'generated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
	218	+	"username": first_username,
	219	+	"brief": brief,
	220	+	"results": username_results,
	221	+	"first_seen": first_seen,
	222	+	"interests_tuple_list": tuple_sort(interests_list),
	223	+	"countries_tuple_list": tuple_sort(countries_lists),
	224	+	"supposed_data": filtered_supposed_data,
	225	+	"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
212	226		}
213	227
214	228
215	229		def generate_csv_report(username: str, results: dict, csvfile):
216	230		writer = csv.writer(csvfile)
217		-	writer.writerow(['username',
218		-	'name',
219		-	'url_main',
220		-	'url_user',
221		-	'exists',
222		-	'http_status'
223		-	]
224		-	)
	231	+	writer.writerow(
	232	+	["username", "name", "url_main", "url_user", "exists", "http_status"]
	233	+	)
225	234		for site in results:
226		-	writer.writerow([username,
227		-	site,
228		-	results[site]['url_main'],
229		-	results[site]['url_user'],
230		-	str(results[site]['status'].status),
231		-	results[site]['http_status'],
232		-	])
	235	+	writer.writerow(
	236	+	[
	237	+	username,
	238	+	site,
	239	+	results[site]["url_main"],
	240	+	results[site]["url_user"],
	241	+	str(results[site]["status"].status),
	242	+	results[site]["http_status"],
	243	+	]
	244	+	)
233	245
234	246
235	247		def generate_txt_report(username: str, results: dict, file):
		skipped 6 lines
242	254		if dictionary.get("status").status == QueryStatus.CLAIMED:
243	255		exists_counter += 1
244	256		file.write(dictionary["url_user"] + "\n")
245		-	file.write(f'Total Websites Username Detected On : {exists_counter}')
	257	+	file.write(f"Total Websites Username Detected On : {exists_counter}")
246	258
247	259
248	260		def generate_json_report(username: str, results: dict, file, report_type):
249		-	exists_counter = 0
250		-	is_report_per_line = report_type.startswith('ndjson')
	261	+	is_report_per_line = report_type.startswith("ndjson")
251	262		all_json = {}
252	263
253	264		for sitename in results:
		skipped 3 lines
257	268		continue
258	269
259	270		data = dict(site_result)
260		-	data['status'] = data['status'].json()
	271	+	data["status"] = data["status"].json()
261	272
262	273		if is_report_per_line:
263		-	data['sitename'] = sitename
264		-	file.write(json.dumps(data) + '\n')
	274	+	data["sitename"] = sitename
	275	+	file.write(json.dumps(data) + "\n")
265	276		else:
266	277		all_json[sitename] = data
267	278
		skipped 1 lines
269	280		file.write(json.dumps(all_json))
270	281
271	282
272		-	'''
	283	+	"""
273	284		XMIND 8 Functions
274		-	'''
	285	+	"""
275	286
276	287
277	288		def save_xmind_report(filename, username, results):
		skipped 6 lines
284	295
285	296
286	297		def design_sheet(sheet, username, results):
287		-	##all tag list
288	298		alltags = {}
289	299		supposed_data = {}
290	300
		skipped 9 lines
300	310		dictionary = results[website_name]
301	311
302	312		if dictionary.get("status").status == QueryStatus.CLAIMED:
303		-	## firsttime I found that entry
	313	+	# firsttime I found that entry
304	314		for tag in dictionary.get("status").tags:
305	315		if tag.strip() == "":
306	316		continue
		skipped 22 lines
329	339		# suppose target data
330	340		if not isinstance(v, list):
331	341		currentsublabel = userlink.addSubTopic()
332		-	field = 'fullname' if k == 'name' else k
333		-	if not field in supposed_data:
	342	+	field = "fullname" if k == "name" else k
	343	+	if field not in supposed_data:
334	344		supposed_data[field] = []
335	345		supposed_data[field].append(v)
336	346		currentsublabel.setTitle("%s: %s" % (k, v))
337	347		else:
338	348		for currentval in v:
339	349		currentsublabel = userlink.addSubTopic()
340		-	field = 'fullname' if k == 'name' else k
341		-	if not field in supposed_data:
	350	+	field = "fullname" if k == "name" else k
	351	+	if field not in supposed_data:
342	352		supposed_data[field] = []
343	353		supposed_data[field].append(currentval)
344	354		currentsublabel.setTitle("%s: %s" % (k, currentval))
345		-	### Add Supposed DATA
	355	+	# add supposed data
346	356		filterede_supposed_data = filter_supposed_data(supposed_data)
347		-	if (len(filterede_supposed_data) > 0):
	357	+	if len(filterede_supposed_data) > 0:
348	358		undefinedsection = root_topic1.addSubTopic()
349	359		undefinedsection.setTitle("SUPPOSED DATA")
350	360		for k, v in filterede_supposed_data.items():
		skipped 2 lines
353	363
354	364
355	365		def check_supported_json_format(value):
356		-	if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
357		-	raise ArgumentTypeError(f'JSON report type must be one of the following types: '
358		-	+ ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
	366	+	if value and value not in SUPPORTED_JSON_REPORT_FORMATS:
	367	+	raise ArgumentTypeError(
	368	+	"JSON report type must be one of the following types: "
	369	+	+ ", ".join(SUPPORTED_JSON_REPORT_FORMATS)
	370	+	)
359	371		return value
360	372

■ ■ ■ ■ ■ ■ ■

maigret/resources/data.json

		skipped 12147 lines
12148	12148		"us"
12149	12149		],
12150	12150		"headers": {
12151		-	"authorization": "Bearer BQAEeuyBT6S535Anlx4wU-pfPjjgiE8r2e7j0eOSnwZjSvjFvQgDzxwV__03-WNbwxPKyGehoJ5pQCBwUqs"
	12151	+	"authorization": "Bearer BQCe5Yx_Evl2m1Td_86SzknoVan7OZxN6y6WaR7xNrJb8vnZ5B7VZY401MdivLmCQcyv0LUkfo1M-15_m-E"
12152	12152		},
12153	12153		"errors": {
12154	12154		"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
		skipped 1303 lines
13458	13458		"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
13459	13459		"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
13460	13460		"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
13461		-	"x-guest-token": "1387733472027070474"
	13461	+	"x-guest-token": "1388029767388106752"
13462	13462		},
13463	13463		"errors": {
13464	13464		"Bad guest token": "x-guest-token update required"
		skipped 196 lines
13661	13661		"type": "vk_id",
13662	13662		"checkType": "response_url",
13663	13663		"alexaRank": 26,
	13664	+	"source": "VK",
13664	13665		"url": "https://vk.com/id{username}",
13665	13666		"urlMain": "https://vk.com/",
13666	13667		"usernameClaimed": "270433952",
		skipped 5 lines
13672	13673		],
13673	13674		"checkType": "status_code",
13674	13675		"alexaRank": 28938,
	13676	+	"source": "VK",
13675	13677		"url": "https://vkfaces.com/vk/user/{username}",
13676	13678		"urlMain": "https://vkfaces.com",
13677	13679		"usernameClaimed": "adam",
		skipped 157 lines
13835	13837		"video"
13836	13838		],
13837	13839		"headers": {
13838		-	"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk2OTczNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.yLRq0lhenTYfe0EKKJsk5HZJZt3ykUVNBGuiMCC5HR4"
	13840	+	"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk3NzM3NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.4O4QL4IsoiKl0Cz1310Qjo9WablDr5LIyMOPQgMS1XE"
13839	13841		},
13840	13842		"activation": {
13841	13843		"url": "https://vimeo.com/_rv/viewer",
		skipped 2283 lines
16125	16127		"gb",
16126	16128		"uk"
16127	16129		],
16128		-	"checkType": "message",
16129		-	"absenceStrs": "The specified member cannot be found. Please enter a member's entire name.",
	16130	+	"engine": "XenForo",
16130	16131		"alexaRank": 12725,
16131		-	"url": "https://forums.overclockers.co.uk/members/?username={username}",
16132	16132		"urlMain": "https://forums.overclockers.co.uk",
16133	16133		"usernameClaimed": "adam",
16134	16134		"usernameUnclaimed": "noonewouldeverusethis7"
		skipped 7612 lines
23747	23747		],
23748	23748		"url": "https://opensea.io/accounts/{username}",
23749	23749		"urlMain": "https://opensea.io",
	23750	+	"usernameClaimed": "admin",
	23751	+	"usernameUnclaimed": "noonewouldeverusethis7"
	23752	+	},
	23753	+	"SmiHub": {
	23754	+	"checkType": "message",
	23755	+	"presenseStrs": [
	23756	+	"profile",
	23757	+	"user-page",
	23758	+	"user",
	23759	+	" data-name=",
	23760	+	"user__img"
	23761	+	],
	23762	+	"absenceStrs": [
	23763	+	"text-lg mb-3"
	23764	+	],
	23765	+	"source": "Instagram",
	23766	+	"url": "https://smihub.com/v/{username}",
	23767	+	"urlMain": "https://smihub.com",
	23768	+	"usernameClaimed": "blue",
	23769	+	"usernameUnclaimed": "noonewouldeverusethis7"
	23770	+	},
	23771	+	"do100verno.info": {
	23772	+	"checkType": "message",
	23773	+	"presenseStrs": [
	23774	+	"white-space: nowrap;"
	23775	+	],
	23776	+	"absenceStrs": [
	23777	+	"l-main",
	23778	+	" l-mainDcL",
	23779	+	" l-usrMenu"
	23780	+	],
	23781	+	"url": "https://do100verno.info/card/{username}",
	23782	+	"urlMain": "https://do100verno.info",
	23783	+	"usernameClaimed": "ekostyle",
	23784	+	"usernameUnclaimed": "noonewouldeverusethis7"
	23785	+	},
	23786	+	"www.kinokopilka.pro": {
	23787	+	"checkType": "message",
	23788	+	"presenseStrs": [
	23789	+	"profile",
	23790	+	"user",
	23791	+	"people",
	23792	+	"users",
	23793	+	"/people"
	23794	+	],
	23795	+	"url": "https://www.kinokopilka.pro/users/{username}",
	23796	+	"urlMain": "https://www.kinokopilka.pro",
	23797	+	"usernameClaimed": "admin",
	23798	+	"usernameUnclaimed": "noonewouldeverusethis7"
	23799	+	},
	23800	+	"www.turpravda.com": {
	23801	+	"checkType": "message",
	23802	+	"presenseStrs": [
	23803	+	"email",
	23804	+	" name"
	23805	+	],
	23806	+	"absenceStrs": [
	23807	+	"Title",
	23808	+	" Shortcut Icon",
	23809	+	" submit"
	23810	+	],
	23811	+	"url": "https://www.turpravda.com/profile/{username}",
	23812	+	"urlMain": "https://www.turpravda.com",
23750	23813		"usernameClaimed": "admin",
23751	23814		"usernameUnclaimed": "noonewouldeverusethis7"
23752	23815		}
		skipped 158 lines

■ ■ ■ ■ ■ ■

maigret/result.py

		skipped 9 lines
10	10
11	11		Describes status of query about a given username.
12	12		"""
	13	+
13	14		CLAIMED = "Claimed" # Username Detected
14	15		AVAILABLE = "Available" # Username Not Detected
15	16		UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username
		skipped 11 lines
27	28		return self.value
28	29
29	30
30		-	class QueryResult():
	31	+	class QueryResult:
31	32		"""Query Result Object.
32	33
33	34		Describes result of query about a given username.
34	35		"""
35	36
36		-	def __init__(self, username, site_name, site_url_user, status, ids_data=None,
37		-	query_time=None, context=None, error=None, tags=[]):
	37	+	def __init__(
	38	+	self,
	39	+	username,
	40	+	site_name,
	41	+	site_url_user,
	42	+	status,
	43	+	ids_data=None,
	44	+	query_time=None,
	45	+	context=None,
	46	+	error=None,
	47	+	tags=[],
	48	+	):
38	49		"""Create Query Result Object.
39	50
40	51		Contains information about a specific method of detecting usernames on
		skipped 36 lines
77	88
78	89		def json(self):
79	90		return {
80		-	'username': self.username,
81		-	'site_name': self.site_name,
82		-	'url': self.site_url_user,
83		-	'status': str(self.status),
84		-	'ids': self.ids_data or {},
85		-	'tags': self.tags,
	91	+	"username": self.username,
	92	+	"site_name": self.site_name,
	93	+	"url": self.site_url_user,
	94	+	"status": str(self.status),
	95	+	"ids": self.ids_data or {},
	96	+	"tags": self.tags,
86	97		}
87	98
88	99		def is_found(self):
		skipped 19 lines

■ ■ ■ ■ ■ ■

maigret/sites.py

1		-	# -- coding: future_annotations --
	1	+	# ****************************** -*-
2	2		"""Maigret Sites Information"""
3	3		import copy
4	4		import json
5	5		import sys
	6	+	from typing import Optional
6	7
7	8		import requests
8	9
		skipped 1 lines
10	11
11	12		# TODO: move to data.json
12	13		SUPPORTED_TAGS = [
13		-	'gaming', 'coding', 'photo', 'music', 'blog', 'finance', 'freelance', 'dating',
14		-	'tech', 'forum', 'porn', 'erotic', 'webcam', 'video', 'movies', 'hacking', 'art',
15		-	'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
16		-	'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
17		-	'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
18		-	'science', 'medicine', 'reading', 'stock',
	14	+	"gaming",
	15	+	"coding",
	16	+	"photo",
	17	+	"music",
	18	+	"blog",
	19	+	"finance",
	20	+	"freelance",
	21	+	"dating",
	22	+	"tech",
	23	+	"forum",
	24	+	"porn",
	25	+	"erotic",
	26	+	"webcam",
	27	+	"video",
	28	+	"movies",
	29	+	"hacking",
	30	+	"art",
	31	+	"discussion",
	32	+	"sharing",
	33	+	"writing",
	34	+	"wiki",
	35	+	"business",
	36	+	"shopping",
	37	+	"sport",
	38	+	"books",
	39	+	"news",
	40	+	"documents",
	41	+	"travel",
	42	+	"maps",
	43	+	"hobby",
	44	+	"apps",
	45	+	"classified",
	46	+	"career",
	47	+	"geosocial",
	48	+	"streaming",
	49	+	"education",
	50	+	"networking",
	51	+	"torrent",
	52	+	"science",
	53	+	"medicine",
	54	+	"reading",
	55	+	"stock",
19	56		]
20	57
21	58
		skipped 10 lines
32	69
33	70		class MaigretSite:
34	71		NOT_SERIALIZABLE_FIELDS = [
35		-	'name',
36		-	'engineData',
37		-	'requestFuture',
38		-	'detectedEngine',
39		-	'engineObj',
40		-	'stats',
41		-	'urlRegexp',
	72	+	"name",
	73	+	"engineData",
	74	+	"requestFuture",
	75	+	"detectedEngine",
	76	+	"engineObj",
	77	+	"stats",
	78	+	"urlRegexp",
42	79		]
43	80
44	81		def __init__(self, name, information):
		skipped 4 lines
49	86		self.ignore403 = False
50	87		self.tags = []
51	88
52		-	self.type = 'username'
	89	+	self.type = "username"
53	90		self.headers = {}
54	91		self.errors = {}
55	92		self.activation = {}
56		-	self.url_subpath = ''
	93	+	self.url_subpath = ""
57	94		self.regex_check = None
58	95		self.url_probe = None
59		-	self.check_type = ''
60		-	self.request_head_only = ''
	96	+	self.check_type = ""
	97	+	self.request_head_only = ""
61	98		self.get_params = {}
62	99
63	100		self.presense_strs = []
		skipped 20 lines
84	121		return f"{self.name} ({self.url_main})"
85	122
86	123		def update_detectors(self):
87		-	if 'url' in self.__dict__:
	124	+	if "url" in self.__dict__:
88	125		url = self.url
89		-	for group in ['urlMain', 'urlSubpath']:
	126	+	for group in ["urlMain", "urlSubpath"]:
90	127		if group in url:
91		-	url = url.replace('{' + group + '}', self.__dict__[CaseConverter.camel_to_snake(group)])
	128	+	url = url.replace(
	129	+	"{" + group + "}",
	130	+	self.__dict__[CaseConverter.camel_to_snake(group)],
	131	+	)
92	132
93	133		self.url_regexp = URLMatcher.make_profile_url_regexp(url, self.regex_check)
94	134
95		-	def detect_username(self, url: str) -> str:
	135	+	def detect_username(self, url: str) -> Optional[str]:
96	136		if self.url_regexp:
97	137		match_groups = self.url_regexp.match(url)
98	138		if match_groups:
99		-	return match_groups.groups()[-1].rstrip('/')
	139	+	return match_groups.groups()[-1].rstrip("/")
100	140
101	141		return None
102	142
103	143		@property
104	144		def pretty_name(self):
105	145		if self.source:
106		-	return f'{self.name} [{self.source}]'
	146	+	return f"{self.name} [{self.source}]"
107	147		return self.name
108	148
109	149		@property
		skipped 3 lines
113	153		# convert to camelCase
114	154		field = CaseConverter.snake_to_camel(k)
115	155		# strip empty elements
116		-	if v in (False, '', [], {}, None, sys.maxsize, 'username'):
	156	+	if v in (False, "", [], {}, None, sys.maxsize, "username"):
117	157		continue
118	158		if field in self.NOT_SERIALIZABLE_FIELDS:
119	159		continue
		skipped 1 lines
121	161
122	162		return result
123	163
124		-	def update(self, updates: dict) -> MaigretSite:
	164	+	def update(self, updates: "dict") -> "MaigretSite":
125	165		self.__dict__.update(updates)
126	166		self.update_detectors()
127	167
128	168		return self
129	169
130		-	def update_from_engine(self, engine: MaigretEngine) -> MaigretSite:
	170	+	def update_from_engine(self, engine: MaigretEngine) -> "MaigretSite":
131	171		engine_data = engine.site
132	172		for k, v in engine_data.items():
133	173		field = CaseConverter.camel_to_snake(k)
		skipped 11 lines
145	185
146	186		return self
147	187
148		-	def strip_engine_data(self) -> MaigretSite:
	188	+	def strip_engine_data(self) -> "MaigretSite":
149	189		if not self.engine_obj:
150	190		return self
151	191
		skipped 38 lines
190	230		def sites_dict(self):
191	231		return {site.name: site for site in self._sites}
192	232
193		-	def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[], names=[],
194		-	disabled=True, id_type='username'):
	233	+	def ranked_sites_dict(
	234	+	self,
	235	+	reverse=False,
	236	+	top=sys.maxsize,
	237	+	tags=[],
	238	+	names=[],
	239	+	disabled=True,
	240	+	id_type="username",
	241	+	):
195	242		"""
196		-	Ranking and filtering of the sites list
	243	+	Ranking and filtering of the sites list
197	244		"""
198	245		normalized_names = list(map(str.lower, names))
199	246		normalized_tags = list(map(str.lower, tags))
200	247
201	248		is_name_ok = lambda x: x.name.lower() in normalized_names
202	249		is_source_ok = lambda x: x.source and x.source.lower() in normalized_names
203		-	is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
	250	+	is_engine_ok = (
	251	+	lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
	252	+	)
204	253		is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
205		-	is_disabled_needed = lambda x: not x.disabled or ('disabled' in tags or disabled)
	254	+	is_disabled_needed = lambda x: not x.disabled or (
	255	+	"disabled" in tags or disabled
	256	+	)
206	257		is_id_type_ok = lambda x: x.type == id_type
207	258
208	259		filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
209	260		filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
210	261
211		-	filter_fun = lambda x: filter_tags_engines_fun(x) and filter_names_fun(x) \
212		-	and is_disabled_needed(x) and is_id_type_ok(x)
	262	+	filter_fun = (
	263	+	lambda x: filter_tags_engines_fun(x)
	264	+	and filter_names_fun(x)
	265	+	and is_disabled_needed(x)
	266	+	and is_id_type_ok(x)
	267	+	)
213	268
214	269		filtered_list = [s for s in self.sites if filter_fun(s)]
215	270
216		-	sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top]
	271	+	sorted_list = sorted(
	272	+	filtered_list, key=lambda x: x.alexa_rank, reverse=reverse
	273	+	)[:top]
217	274		return {site.name: site for site in sorted_list}
218	275
219	276		@property
		skipped 4 lines
224	281		def engines_dict(self):
225	282		return {engine.name: engine for engine in self._engines}
226	283
227		-	def update_site(self, site: MaigretSite) -> MaigretDatabase:
	284	+	def update_site(self, site: MaigretSite) -> "MaigretDatabase":
228	285		for s in self._sites:
229	286		if s.name == site.name:
230	287		s = site
		skipped 2 lines
233	290		self._sites.append(site)
234	291		return self
235	292
236		-	def save_to_file(self, filename: str) -> MaigretDatabase:
	293	+	def save_to_file(self, filename: str) -> "MaigretDatabase":
237	294		db_data = {
238		-	'sites': {site.name: site.strip_engine_data().json for site in self._sites},
239		-	'engines': {engine.name: engine.json for engine in self._engines},
	295	+	"sites": {site.name: site.strip_engine_data().json for site in self._sites},
	296	+	"engines": {engine.name: engine.json for engine in self._engines},
240	297		}
241	298
242	299		json_data = json.dumps(db_data, indent=4)
243	300
244		-	with open(filename, 'w') as f:
	301	+	with open(filename, "w") as f:
245	302		f.write(json_data)
246	303
247	304		return self
248	305
249		-	def load_from_json(self, json_data: dict) -> MaigretDatabase:
	306	+	def load_from_json(self, json_data: dict) -> "MaigretDatabase":
250	307		# Add all of site information from the json file to internal site list.
251	308		site_data = json_data.get("sites", {})
252	309		engines_data = json_data.get("engines", {})
		skipped 5 lines
258	315		try:
259	316		maigret_site = MaigretSite(site_name, site_data[site_name])
260	317
261		-	engine = site_data[site_name].get('engine')
	318	+	engine = site_data[site_name].get("engine")
262	319		if engine:
263	320		maigret_site.update_from_engine(self.engines_dict[engine])
264	321
265	322		self._sites.append(maigret_site)
266	323		except KeyError as error:
267		-	raise ValueError(f"Problem parsing json content for site {site_name}: "
268		-	f"Missing attribute {str(error)}."
269		-	)
	324	+	raise ValueError(
	325	+	f"Problem parsing json content for site {site_name}: "
	326	+	f"Missing attribute {str(error)}."
	327	+	)
270	328
271	329		return self
272	330
273		-	def load_from_str(self, db_str: str) -> MaigretDatabase:
	331	+	def load_from_str(self, db_str: "str") -> "MaigretDatabase":
274	332		try:
275	333		data = json.loads(db_str)
276	334		except Exception as error:
277		-	raise ValueError(f"Problem parsing json contents from str"
278		-	f"'{db_str[:50]}'...: {str(error)}."
279		-	)
	335	+	raise ValueError(
	336	+	f"Problem parsing json contents from str"
	337	+	f"'{db_str[:50]}'...: {str(error)}."
	338	+	)
280	339
281	340		return self.load_from_json(data)
282	341
283		-	def load_from_url(self, url: str) -> MaigretDatabase:
284		-	is_url_valid = url.startswith('http://') or url.startswith('https://')
	342	+	def load_from_url(self, url: str) -> "MaigretDatabase":
	343	+	is_url_valid = url.startswith("http://") or url.startswith("https://")
285	344
286	345		if not is_url_valid:
287	346		raise FileNotFoundError(f"Invalid data file URL '{url}'.")
		skipped 1 lines
289	348		try:
290	349		response = requests.get(url=url)
291	350		except Exception as error:
292		-	raise FileNotFoundError(f"Problem while attempting to access "
293		-	f"data file URL '{url}': "
294		-	f"{str(error)}"
295		-	)
	351	+	raise FileNotFoundError(
	352	+	f"Problem while attempting to access "
	353	+	f"data file URL '{url}': "
	354	+	f"{str(error)}"
	355	+	)
296	356
297	357		if response.status_code == 200:
298	358		try:
299	359		data = response.json()
300	360		except Exception as error:
301		-	raise ValueError(f"Problem parsing json contents at "
302		-	f"'{url}': {str(error)}."
303		-	)
	361	+	raise ValueError(
	362	+	f"Problem parsing json contents at " f"'{url}': {str(error)}."
	363	+	)
304	364		else:
305		-	raise FileNotFoundError(f"Bad response while accessing "
306		-	f"data file URL '{url}'."
307		-	)
	365	+	raise FileNotFoundError(
	366	+	f"Bad response while accessing " f"data file URL '{url}'."
	367	+	)
308	368
309	369		return self.load_from_json(data)
310	370
311		-	def load_from_file(self, filename: str) -> MaigretDatabase:
	371	+	def load_from_file(self, filename: "str") -> "MaigretDatabase":
312	372		try:
313		-	with open(filename, 'r', encoding='utf-8') as file:
	373	+	with open(filename, "r", encoding="utf-8") as file:
314	374		try:
315	375		data = json.load(file)
316	376		except Exception as error:
317		-	raise ValueError(f"Problem parsing json contents from "
318		-	f"file '{filename}': {str(error)}."
319		-	)
	377	+	raise ValueError(
	378	+	f"Problem parsing json contents from "
	379	+	f"file '{filename}': {str(error)}."
	380	+	)
320	381		except FileNotFoundError as error:
321		-	raise FileNotFoundError(f"Problem while attempting to access "
322		-	f"data file '{filename}'."
323		-	)
	382	+	raise FileNotFoundError(
	383	+	f"Problem while attempting to access " f"data file '{filename}'."
	384	+	) from error
324	385
325	386		return self.load_from_json(data)
326	387
		skipped 1 lines
328	389		sites = sites_dict or self.sites_dict
329	390		found_flags = {}
330	391		for _, s in sites.items():
331		-	if 'presense_flag' in s.stats:
332		-	flag = s.stats['presense_flag']
	392	+	if "presense_flag" in s.stats:
	393	+	flag = s.stats["presense_flag"]
333	394		found_flags[flag] = found_flags.get(flag, 0) + 1
334	395
335	396		return found_flags
		skipped 2 lines
338	399		if not sites_dict:
339	400		sites_dict = self.sites_dict()
340	401
341		-	output = ''
	402	+	output = ""
342	403		disabled_count = 0
343	404		total_count = len(sites_dict)
344	405		urls = {}
		skipped 4 lines
349	410		disabled_count += 1
350	411
351	412		url = URLMatcher.extract_main_part(site.url)
352		-	if url.startswith('{username}'):
353		-	url = 'SUBDOMAIN'
354		-	elif url == '':
355		-	url = f'{site.url} ({site.engine})'
	413	+	if url.startswith("{username}"):
	414	+	url = "SUBDOMAIN"
	415	+	elif url == "":
	416	+	url = f"{site.url} ({site.engine})"
356	417		else:
357		-	parts = url.split('/')
358		-	url = '/' + '/'.join(parts[1:])
	418	+	parts = url.split("/")
	419	+	url = "/" + "/".join(parts[1:])
359	420
360	421		urls[url] = urls.get(url, 0) + 1
361	422
362	423		if not site.tags:
363		-	tags['NO_TAGS'] = tags.get('NO_TAGS', 0) + 1
	424	+	tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
364	425
365	426		for tag in site.tags:
366	427		if is_country_tag(tag):
		skipped 1 lines
368	429		continue
369	430		tags[tag] = tags.get(tag, 0) + 1
370	431
371		-	output += f'Enabled/total sites: {total_count - disabled_count}/{total_count}\n'
372		-	output += 'Top sites\' profile URLs:\n'
	432	+	output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
	433	+	output += "Top sites' profile URLs:\n"
373	434		for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
374	435		if count == 1:
375	436		break
376		-	output += f'{count}\t{url}\n'
377		-	output += 'Top sites\' tags:\n'
	437	+	output += f"{count}\t{url}\n"
	438	+	output += "Top sites' tags:\n"
378	439		for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True):
379		-	mark = ''
380		-	if not tag in SUPPORTED_TAGS:
381		-	mark = ' (non-standard)'
382		-	output += f'{count}\t{tag}{mark}\n'
	440	+	mark = ""
	441	+	if tag not in SUPPORTED_TAGS:
	442	+	mark = " (non-standard)"
	443	+	output += f"{count}\t{tag}{mark}\n"
383	444
384	445		return output
385	446

■ ■ ■ ■ ■ ■

maigret/submit.py

	1	+	import asyncio
1	2		import difflib
	3	+	import re
2	4
3	5		import requests
4	6
5		-	from .checking import *
	7	+	from .activation import import_aiohttp_cookies
	8	+	from .checking import maigret
	9	+	from .result import QueryStatus
	10	+	from .sites import MaigretDatabase, MaigretSite
6	11		from .utils import get_random_user_agent
7	12
8	13
9		-	DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
10		-	"birthday", "репутация", "информация", "e-mail"]
	14	+	DESIRED_STRINGS = [
	15	+	"username",
	16	+	"not found",
	17	+	"пользователь",
	18	+	"profile",
	19	+	"lastname",
	20	+	"firstname",
	21	+	"biography",
	22	+	"birthday",
	23	+	"репутация",
	24	+	"информация",
	25	+	"e-mail",
	26	+	]
11	27
12		-	SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
	28	+	SUPPOSED_USERNAMES = ["alex", "god", "admin", "red", "blue", "john"]
13	29
14	30		HEADERS = {
15		-	'User-Agent': get_random_user_agent(),
	31	+	"User-Agent": get_random_user_agent(),
16	32		}
17	33
18	34		RATIO = 0.6
19	35		TOP_FEATURES = 5
20		-	URL_RE = re.compile(r'https?://(www\.)?')
	36	+	URL_RE = re.compile(r"https?://(www\.)?")
21	37
22	38
23	39		def get_match_ratio(x):
24		-	return round(max([
25		-	difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
26		-	for y in DESIRED_STRINGS
27		-	]), 2)
	40	+	return round(
	41	+	max(
	42	+	[difflib.SequenceMatcher(a=x.lower(), b=y).ratio() for y in DESIRED_STRINGS]
	43	+	),
	44	+	2,
	45	+	)
28	46
29	47
30	48		def extract_mainpage_url(url):
31		-	return '/'.join(url.split('/', 3)[:3])
	49	+	return "/".join(url.split("/", 3)[:3])
32	50
33	51
34	52		async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
35	53		changes = {
36		-	'disabled': False,
	54	+	"disabled": False,
37	55		}
38	56
39	57		check_data = [
		skipped 1 lines
41	59		(site.username_unclaimed, QueryStatus.AVAILABLE),
42	60		]
43	61
44		-	logger.info(f'Checking {site.name}...')
	62	+	logger.info(f"Checking {site.name}...")
45	63
46	64		for username, status in check_data:
47	65		results_dict = await maigret(
		skipped 10 lines
58	76		# TODO: make normal checking
59	77		if site.name not in results_dict:
60	78		logger.info(results_dict)
61		-	changes['disabled'] = True
	79	+	changes["disabled"] = True
62	80		continue
63	81
64		-	result = results_dict[site.name]['status']
	82	+	result = results_dict[site.name]["status"]
65	83
66	84		site_status = result.status
67	85
		skipped 2 lines
70	88		msgs = site.absence_strs
71	89		etype = site.check_type
72	90		logger.warning(
73		-	f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
	91	+	f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
	92	+	)
74	93		# don't disable in case of available username
75	94		if status == QueryStatus.CLAIMED:
76		-	changes['disabled'] = True
	95	+	changes["disabled"] = True
77	96		elif status == QueryStatus.CLAIMED:
78		-	logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
	97	+	logger.warning(
	98	+	f"Not found `{username}` in {site.name}, must be claimed"
	99	+	)
79	100		logger.info(results_dict[site.name])
80		-	changes['disabled'] = True
	101	+	changes["disabled"] = True
81	102		else:
82		-	logger.warning(f'Found `{username}` in {site.name}, must be available')
	103	+	logger.warning(f"Found `{username}` in {site.name}, must be available")
83	104		logger.info(results_dict[site.name])
84		-	changes['disabled'] = True
	105	+	changes["disabled"] = True
85	106
86		-	logger.info(f'Site {site.name} checking is finished')
	107	+	logger.info(f"Site {site.name} checking is finished")
87	108
88	109		return changes
89	110
		skipped 3 lines
93	114		r = requests.get(url_mainpage)
94	115		except Exception as e:
95	116		print(e)
96		-	print('Some error while checking main page')
	117	+	print("Some error while checking main page")
97	118		return None
98	119
99		-	for e in db.engines:
100		-	strs_to_check = e.__dict__.get('presenseStrs')
	120	+	for engine in db.engines:
	121	+	strs_to_check = engine.__dict__.get("presenseStrs")
101	122		if strs_to_check and r and r.text:
102	123		all_strs_in_response = True
103	124		for s in strs_to_check:
104		-	if not s in r.text:
	125	+	if s not in r.text:
105	126		all_strs_in_response = False
106	127		if all_strs_in_response:
107		-	engine_name = e.__dict__.get('name')
108		-	print(f'Detected engine {engine_name} for site {url_mainpage}')
	128	+	engine_name = engine.__dict__.get("name")
	129	+	print(f"Detected engine {engine_name} for site {url_mainpage}")
109	130
110	131		sites = []
111	132		for u in SUPPOSED_USERNAMES:
112	133		site_data = {
113		-	'urlMain': url_mainpage,
114		-	'name': url_mainpage.split('//')[0],
115		-	'engine': engine_name,
116		-	'usernameClaimed': u,
117		-	'usernameUnclaimed': 'noonewouldeverusethis7',
	134	+	"urlMain": url_mainpage,
	135	+	"name": url_mainpage.split("//")[0],
	136	+	"engine": engine_name,
	137	+	"usernameClaimed": u,
	138	+	"usernameUnclaimed": "noonewouldeverusethis7",
118	139		}
119	140
120		-	maigret_site = MaigretSite(url_mainpage.split('/')[-1], site_data)
	141	+	maigret_site = MaigretSite(url_mainpage.split("/")[-1], site_data)
121	142		maigret_site.update_from_engine(db.engines_dict[engine_name])
122	143		sites.append(maigret_site)
123	144
		skipped 2 lines
126	147		return None
127	148
128	149
129		-	async def check_features_manually(db, url_exists, url_mainpage, cookie_file, logger, redirects=True):
130		-	url_parts = url_exists.split('/')
	150	+	async def check_features_manually(
	151	+	db, url_exists, url_mainpage, cookie_file, logger, redirects=True
	152	+	):
	153	+	url_parts = url_exists.split("/")
131	154		supposed_username = url_parts[-1]
132		-	new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
	155	+	new_name = input(
	156	+	f'Is "{supposed_username}" a valid username? If not, write it manually: '
	157	+	)
133	158		if new_name:
134	159		supposed_username = new_name
135		-	non_exist_username = 'noonewouldeverusethis7'
	160	+	non_exist_username = "noonewouldeverusethis7"
136	161
137		-	url_user = url_exists.replace(supposed_username, '{username}')
	162	+	url_user = url_exists.replace(supposed_username, "{username}")
138	163		url_not_exists = url_exists.replace(supposed_username, non_exist_username)
139	164
140	165		# cookies
		skipped 2 lines
143	168		cookie_jar = await import_aiohttp_cookies(cookie_file)
144	169		cookie_dict = {c.key: c.value for c in cookie_jar}
145	170
146		-	exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
	171	+	exists_resp = requests.get(
	172	+	url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
	173	+	)
147	174		logger.debug(exists_resp.status_code)
148	175		logger.debug(exists_resp.text)
149	176
150		-	non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
	177	+	non_exists_resp = requests.get(
	178	+	url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
	179	+	)
151	180		logger.debug(non_exists_resp.status_code)
152	181		logger.debug(non_exists_resp.text)
153		-
154	182
155	183		a = exists_resp.text
156	184		b = non_exists_resp.text
		skipped 5 lines
162	190		b_minus_a = tokens_b.difference(tokens_a)
163	191
164	192		if len(a_minus_b) == len(b_minus_a) == 0:
165		-	print('The pages for existing and non-existing account are the same!')
	193	+	print("The pages for existing and non-existing account are the same!")
166	194
167		-	top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
	195	+	top_features_count = int(
	196	+	input(f"Specify count of features to extract [default {TOP_FEATURES}]: ")
	197	+	or TOP_FEATURES
	198	+	)
168	199
169		-	presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
	200	+	presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[
	201	+	:top_features_count
	202	+	]
170	203
171		-	print('Detected text features of existing account: ' + ', '.join(presence_list))
172		-	features = input('If features was not detected correctly, write it manually: ')
	204	+	print("Detected text features of existing account: " + ", ".join(presence_list))
	205	+	features = input("If features was not detected correctly, write it manually: ")
173	206
174	207		if features:
175		-	presence_list = features.split(',')
	208	+	presence_list = features.split(",")
176	209
177		-	absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[:top_features_count]
178		-	print('Detected text features of non-existing account: ' + ', '.join(absence_list))
179		-	features = input('If features was not detected correctly, write it manually: ')
	210	+	absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
	211	+	:top_features_count
	212	+	]
	213	+	print("Detected text features of non-existing account: " + ", ".join(absence_list))
	214	+	features = input("If features was not detected correctly, write it manually: ")
180	215
181	216		if features:
182		-	absence_list = features.split(',')
	217	+	absence_list = features.split(",")
183	218
184	219		site_data = {
185		-	'absenceStrs': absence_list,
186		-	'presenseStrs': presence_list,
187		-	'url': url_user,
188		-	'urlMain': url_mainpage,
189		-	'usernameClaimed': supposed_username,
190		-	'usernameUnclaimed': non_exist_username,
191		-	'checkType': 'message',
	220	+	"absenceStrs": absence_list,
	221	+	"presenseStrs": presence_list,
	222	+	"url": url_user,
	223	+	"urlMain": url_mainpage,
	224	+	"usernameClaimed": supposed_username,
	225	+	"usernameUnclaimed": non_exist_username,
	226	+	"checkType": "message",
192	227		}
193	228
194		-	site = MaigretSite(url_mainpage.split('/')[-1], site_data)
	229	+	site = MaigretSite(url_mainpage.split("/")[-1], site_data)
195	230		return site
196	231
197	232
198	233		async def submit_dialog(db, url_exists, cookie_file, logger):
199		-	domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
200		-	domain_raw = domain_raw.split('/')[0]
	234	+	domain_raw = URL_RE.sub("", url_exists).strip().strip("/")
	235	+	domain_raw = domain_raw.split("/")[0]
201	236
202	237		# check for existence
203	238		matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
204	239
205	240		if matched_sites:
206		-	print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
207		-	status = lambda s: '(disabled)' if s.disabled else ''
208		-	url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
209		-	print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
	241	+	print(
	242	+	f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
	243	+	)
	244	+	status = lambda s: "(disabled)" if s.disabled else ""
	245	+	url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
	246	+	print(
	247	+	"\n".join(
	248	+	[
	249	+	f"{site.name} {status(site)}{url_block(site)}"
	250	+	for site in matched_sites
	251	+	]
	252	+	)
	253	+	)
210	254
211		-	if input(f'Do you want to continue? [yN] ').lower() in 'n':
	255	+	if input("Do you want to continue? [yN] ").lower() in "n":
212	256		return False
213	257
214	258		url_mainpage = extract_mainpage_url(url_exists)
215	259
216	260		sites = await detect_known_engine(db, url_exists, url_mainpage)
217	261		if not sites:
218		-	print('Unable to detect site engine, lets generate checking features')
219		-	sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file, logger)]
	262	+	print("Unable to detect site engine, lets generate checking features")
	263	+	sites = [
	264	+	await check_features_manually(
	265	+	db, url_exists, url_mainpage, cookie_file, logger
	266	+	)
	267	+	]
220	268
221	269		logger.debug(sites[0].__dict__)
222	270
		skipped 4 lines
227	275		for s in sites:
228	276		chosen_site = s
229	277		result = await site_self_check(s, logger, sem, db)
230		-	if not result['disabled']:
	278	+	if not result["disabled"]:
231	279		found = True
232	280		break
233	281
234	282		if not found:
235		-	print(f'Sorry, we couldn\'t find params to detect account presence/absence in {chosen_site.name}.')
236		-	print('Try to run this mode again and increase features count or choose others.')
	283	+	print(
	284	+	f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
	285	+	)
	286	+	print(
	287	+	"Try to run this mode again and increase features count or choose others."
	288	+	)
237	289		else:
238		-	if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
	290	+	if (
	291	+	input(
	292	+	f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
	293	+	).lower()
	294	+	in "y"
	295	+	):
239	296		logger.debug(chosen_site.json)
240	297		site_data = chosen_site.strip_engine_data()
241	298		logger.debug(site_data.json)
		skipped 5 lines

■ ■ ■ ■ ■ ■

maigret/types.py

		skipped 3 lines
4	4		# search query
5	5		QueryDraft = Tuple[Callable, Any, Any]
6	6
7		-	# error got as a result of completed search query
8		-	class CheckError:
9		-	_type = 'Unknown'
10		-	_desc = ''
11		-
12		-	def __init__(self, typename, desc=''):
13		-	self._type = typename
14		-	self._desc = desc
15		-
16		-	def __str__(self):
17		-	if not self._desc:
18		-	return f'{self._type} error'
19		-
20		-	return f'{self._type} error: {self._desc}'
21		-
22		-	@property
23		-	def type(self):
24		-	return self._type
25		-
26		-	@property
27		-	def desc(self):
28		-	return self._desc
29		-

■ ■ ■ ■ ■ ■

maigret/utils.py

		skipped 2 lines
3	3
4	4
5	5		DEFAULT_USER_AGENTS = [
6		-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
	6	+	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
7	7		]
8	8
9	9
10	10		class CaseConverter:
11	11		@staticmethod
12	12		def camel_to_snake(camelcased_string: str) -> str:
13		-	return re.sub(r'(?<!^)(?=[A-Z])', '_', camelcased_string).lower()
	13	+	return re.sub(r"(?<!^)(?=[A-Z])", "_", camelcased_string).lower()
14	14
15	15		@staticmethod
16	16		def snake_to_camel(snakecased_string: str) -> str:
17		-	formatted = ''.join(word.title() for word in snakecased_string.split('_'))
	17	+	formatted = "".join(word.title() for word in snakecased_string.split("_"))
18	18		result = formatted[0].lower() + formatted[1:]
19	19		return result
20	20
21	21		@staticmethod
22	22		def snake_to_title(snakecased_string: str) -> str:
23		-	words = snakecased_string.split('_')
	23	+	words = snakecased_string.split("_")
24	24		words[0] = words[0].title()
25		-	return ' '.join(words)
	25	+	return " ".join(words)
26	26
27	27
28	28		def is_country_tag(tag: str) -> bool:
29	29		"""detect if tag represent a country"""
30		-	return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == 'global'
	30	+	return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == "global"
31	31
32	32
33	33		def enrich_link_str(link: str) -> str:
34	34		link = link.strip()
35		-	if link.startswith('www.') or (link.startswith('http') and '//' in link):
	35	+	if link.startswith("www.") or (link.startswith("http") and "//" in link):
36	36		return f'<a class="auto-link" href="{link}">{link}</a>'
37	37		return link
38	38
39	39
40	40		class URLMatcher:
41		-	_HTTP_URL_RE_STR = '^https?://(www.)?(.+)$'
	41	+	_HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
42	42		HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
43		-	UNSAFE_SYMBOLS = '.?'
	43	+	UNSAFE_SYMBOLS = ".?"
44	44
45	45		@classmethod
46	46		def extract_main_part(self, url: str) -> str:
47	47		match = self.HTTP_URL_RE.search(url)
48	48		if match and match.group(2):
49		-	return match.group(2).rstrip('/')
	49	+	return match.group(2).rstrip("/")
50	50
51		-	return ''
	51	+	return ""
52	52
53	53		@classmethod
54		-	def make_profile_url_regexp(self, url: str, username_regexp: str = ''):
	54	+	def make_profile_url_regexp(self, url: str, username_regexp: str = ""):
55	55		url_main_part = self.extract_main_part(url)
56	56		for c in self.UNSAFE_SYMBOLS:
57		-	url_main_part = url_main_part.replace(c, f'\\{c}')
58		-	username_regexp = username_regexp or '.+?'
	57	+	url_main_part = url_main_part.replace(c, f"\\{c}")
	58	+	username_regexp = username_regexp or ".+?"
59	59
60		-	url_regexp = url_main_part.replace('{username}', f'({username_regexp})')
61		-	regexp_str = self._HTTP_URL_RE_STR.replace('(.+)', url_regexp)
	60	+	url_regexp = url_main_part.replace("{username}", f"({username_regexp})")
	61	+	regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
62	62
63	63		return re.compile(regexp_str)
64	64
65	65
66		-	def get_dict_ascii_tree(items, prepend='', new_line=True):
67		-	text = ''
	66	+	def get_dict_ascii_tree(items, prepend="", new_line=True):
	67	+	text = ""
68	68		for num, item in enumerate(items):
69		-	box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
	69	+	box_symbol = "┣╸" if num != len(items) - 1 else "┗╸"
70	70
71	71		if type(item) == tuple:
72	72		field_name, field_value = item
73		-	if field_value.startswith('[\''):
	73	+	if field_value.startswith("['"):
74	74		is_last_item = num == len(items) - 1
75		-	prepend_symbols = ' ' * 3 if is_last_item else ' ┃ '
	75	+	prepend_symbols = " " * 3 if is_last_item else " ┃ "
76	76		field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
77		-	text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
	77	+	text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
78	78		else:
79		-	text += f'\n{prepend}{box_symbol} {item}'
	79	+	text += f"\n{prepend}{box_symbol} {item}"
80	80
81	81		if not new_line:
82	82		text = text[1:]
		skipped 7 lines

■ ■ ■ ■ ■ ■

maigret.py

skipped 15 lines
16 16
17 17 if __name__ == "__main__":
18 18 run()
19 +

All occurrences
■ ■ ■ ■ ■ ■

setup.cfg

1 1 [egg_info]
2 2 tag_build =
3 3 tag_date = 0
4 +
5 + [flake8]
6 + per-file-ignores = __init__.py:F401
7 +
8 + [mypy]
9 + ignore_missing_imports = True

All occurrences
■ ■ ■ ■ ■ ■

test.sh

1 + #!/bin/sh
2 + pytest tests
3 +

All occurrences

■ ■ ■ ■ ■ ■

wizard.py

		skipped 25 lines
26	26		# user input
27	27		username = input('Enter username to search: ')
28	28
29		-	sites_count_raw = input(f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): ')
	29	+	sites_count_raw = input(
	30	+	f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
	31	+	)
30	32		sites_count = int(sites_count_raw) or TOP_SITES_COUNT
31	33
32	34		sites = db.ranked_sites_dict(top=sites_count)
		skipped 1 lines
34	36		show_progressbar_raw = input('Do you want to show a progressbar? [Yn] ')
35	37		show_progressbar = show_progressbar_raw.lower() != 'n'
36	38
37		-	extract_info_raw = input('Do you want to extract additional info from accounts\' pages? [Yn] ')
	39	+	extract_info_raw = input(
	40	+	'Do you want to extract additional info from accounts\' pages? [Yn] '
	41	+	)
38	42		extract_info = extract_info_raw.lower() != 'n'
39	43
40		-	use_notifier_raw = input('Do you want to use notifier for displaying results while searching? [Yn] ')
	44	+	use_notifier_raw = input(
	45	+	'Do you want to use notifier for displaying results while searching? [Yn] '
	46	+	)
41	47		use_notifier = use_notifier_raw.lower() != 'n'
42	48
43	49		notifier = None
		skipped 1 lines
45	51		notifier = maigret.Notifier(print_found_only=True, skip_check_errors=True)
46	52
47	53		# search!
48		-	search_func = maigret.search(username=username,
49		-	site_dict=sites,
50		-	timeout=TIMEOUT,
51		-	logger=logger,
52		-	max_connections=MAX_CONNECTIONS,
53		-	query_notify=notifier,
54		-	no_progressbar=(not show_progressbar),
55		-	is_parsing_enabled=extract_info,
56		-	)
	54	+	search_func = maigret.search(
	55	+	username=username,
	56	+	site_dict=sites,
	57	+	timeout=TIMEOUT,
	58	+	logger=logger,
	59	+	max_connections=MAX_CONNECTIONS,
	60	+	query_notify=notifier,
	61	+	no_progressbar=(not show_progressbar),
	62	+	is_parsing_enabled=extract_info,
	63	+	)
57	64
58	65		results = loop.run_until_complete(search_func)
59	66
		skipped 6 lines

1	+	#!/bin/sh
2	+	FILES="maigret wizard.py maigret.py"
3	+
4	+	echo 'black'
5	+	black --skip-string-normalization $FILES

		skipped 2 lines
3	3		from .checking import maigret as search
4	4		from .sites import MaigretEngine, MaigretSite, MaigretDatabase
5	5		from .notify import QueryNotifyPrint as Notifier
	6	+

		skipped 15 lines
16	16
17	17		if __name__ == "__main__":
18	18		run()
	19	+

1	1		[egg_info]
2	2		tag_build =
3	3		tag_date = 0
	4	+
	5	+	[flake8]
	6	+	per-file-ignores = __init__.py:F401
	7	+
	8	+	[mypy]
	9	+	ignore_missing_imports = True

1	+	#!/bin/sh
2	+	pytest tests
3	+

Refactoring and linting, added notifications about frequent search errors