STRLCPY/maigret

Merge pull request #124 from soxoj/refactoring-complexity-decrease
```
Refactored to decrease cyclomatic complexity
```
soxoj committed with GitHub 3 years ago

9858e713

2 parents
ad5c7fbc
c88e194d

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

■ ■ ■ ■ ■ ■

maigret/checking.py

		skipped 53 lines
54	54		decoded_content = response_content.decode(charset, "ignore")
55	55		html_text = decoded_content
56	56
	57	+	error = None
57	58		if status_code == 0:
58	59		error = CheckError("Connection lost")
59		-	else:
60		-	error = None
61	60
62	61		logger.debug(html_text)
63	62
		skipped 9 lines
73	72		error = CheckError("Interrupted")
74	73		except Exception as e:
75	74		# python-specific exceptions
76		-	if sys.version_info.minor > 6:
77		-	if isinstance(e, ssl.SSLCertVerificationError) or isinstance(
78		-	e, ssl.SSLError
79		-	):
80		-	error = CheckError("SSL", str(e))
	75	+	if sys.version_info.minor > 6 and (
	76	+	isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError)
	77	+	):
	78	+	error = CheckError("SSL", str(e))
81	79		else:
82	80		logger.debug(e, exc_info=True)
83	81		error = CheckError("Unexpected", str(e))
		skipped 25 lines
109	107		return None
110	108
111	109
	110	+	def debug_response_logging(url, html_text, status_code, check_error):
	111	+	with open("debug.log", "a") as f:
	112	+	status = status_code or "No response"
	113	+	f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
	114	+	if html_text:
	115	+	f.write(f"code: {status}\nresponse: {str(html_text)}\n")
	116	+
	117	+
112	118		def process_site_result(
113	119		response, query_notify, logger, results_info: QueryResultWrapper, site: MaigretSite
114	120		):
		skipped 27 lines
142	148		response_time = None
143	149
144	150		if logger.level == logging.DEBUG:
145		-	with open("debug.txt", "a") as f:
146		-	status = status_code or "No response"
147		-	f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
148		-	if html_text:
149		-	f.write(f"code: {status}\nresponse: {str(html_text)}\n")
	151	+	debug_response_logging(url, html_text, status_code, check_error)
150	152
151	153		# additional check for errors
152	154		if status_code and not check_error:
		skipped 1 lines
154	156		html_text, status_code, site.errors, site.ignore403
155	157		)
156	158
157		-	if site.activation and html_text:
158		-	is_need_activation = any(
159		-	[s for s in site.activation["marks"] if s in html_text]
160		-	)
161		-	if is_need_activation:
162		-	method = site.activation["method"]
163		-	try:
164		-	activate_fun = getattr(ParsingActivator(), method)
165		-	# TODO: async call
166		-	activate_fun(site, logger)
167		-	except AttributeError:
168		-	logger.warning(
169		-	f"Activation method {method} for site {site.name} not found!"
170		-	)
171		-	except Exception as e:
172		-	logger.warning(f"Failed activation {method} for site {site.name}: {str(e)}", exc_info=True)
173		-	# TODO: temporary check error
	159	+	# parsing activation
	160	+	is_need_activation = any(
	161	+	[s for s in site.activation.get("marks", []) if s in html_text]
	162	+	)
	163	+
	164	+	if site.activation and html_text and is_need_activation:
	165	+	method = site.activation["method"]
	166	+	try:
	167	+	activate_fun = getattr(ParsingActivator(), method)
	168	+	# TODO: async call
	169	+	activate_fun(site, logger)
	170	+	except AttributeError:
	171	+	logger.warning(
	172	+	f"Activation method {method} for site {site.name} not found!"
	173	+	)
	174	+	except Exception as e:
	175	+	logger.warning(
	176	+	f"Failed activation {method} for site {site.name}: {str(e)}",
	177	+	exc_info=True,
	178	+	)
	179	+	# TODO: temporary check error
174	180
175	181		site_name = site.pretty_name
176	182		# presense flags
177	183		# True by default
178	184		presense_flags = site.presense_strs
179	185		is_presense_detected = False
	186	+
180	187		if html_text:
181	188		if not presense_flags:
182	189		is_presense_detected = True
		skipped 79 lines
262	269		results_info["ids_usernames"] = new_usernames
263	270		results_info["ids_links"] = eval(extracted_ids_data.get("links", "[]"))
264	271		result.ids_data = extracted_ids_data
265		-
266		-	# Notify caller about results of query.
267		-	query_notify.update(result, site.similar_search)
268	272
269	273		# Save status of request
270	274		results_info["status"] = result
		skipped 142 lines
413	417		response, query_notify, logger, default_result, site
414	418		)
415	419
	420	+	query_notify.update(response_result['status'], site.similar_search)
	421	+
416	422		return site.name, response_result
417	423
418	424
		skipped 198 lines
617	623		"disabled": False,
618	624		}
619	625
620		-	try:
621		-	check_data = [
622		-	(site.username_claimed, QueryStatus.CLAIMED),
623		-	(site.username_unclaimed, QueryStatus.AVAILABLE),
624		-	]
625		-	except Exception as e:
626		-	logger.error(e)
627		-	logger.error(site.__dict__)
628		-	check_data = []
	626	+	check_data = [
	627	+	(site.username_claimed, QueryStatus.CLAIMED),
	628	+	(site.username_unclaimed, QueryStatus.AVAILABLE),
	629	+	]
629	630
630	631		logger.info(f"Checking {site.name}...")
631	632
		skipped 94 lines

■ ■ ■ ■ ■ ■

maigret/errors.py

		skipped 53 lines
54	54		'Censorship', 'MGTS'
55	55		),
56	56		'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
57		-	'Сайт заблокирован хостинг-провайдером': CheckError('Site-specific', 'Site is disabled (Beget)'),
	57	+	'Сайт заблокирован хостинг-провайдером': CheckError(
	58	+	'Site-specific', 'Site is disabled (Beget)'
	59	+	),
58	60		}
59	61
60	62		ERRORS_TYPES = {
		skipped 68 lines

■ ■ ■ ■ ■ ■

maigret/notify.py

		skipped 151 lines
152	152
153	153		return
154	154
	155	+	def make_colored_terminal_notify(
	156	+	self, status, text, status_color, text_color, appendix
	157	+	):
	158	+	text = [
	159	+	f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
	160	+	+ f"{text_color} {text}: {Style.RESET_ALL}"
	161	+	+ f"{appendix}"
	162	+	]
	163	+	return "".join(text)
	164	+
	165	+	def make_simple_terminal_notify(
	166	+	self, status, text, status_color, text_color, appendix
	167	+	):
	168	+	return f"[{status}] {text}: {appendix}"
	169	+
	170	+	def make_terminal_notify(self, *args):
	171	+	if self.color:
	172	+	return self.make_colored_terminal_notify(*args)
	173	+	else:
	174	+	return self.make_simple_terminal_notify(*args)
	175	+
155	176		def start(self, message, id_type):
156	177		"""Notify Start.
157	178
		skipped 46 lines
204	225		Return Value:
205	226		Nothing.
206	227		"""
	228	+	notify = None
207	229		self.result = result
208	230
209		-	if not self.result.ids_data:
210		-	ids_data_text = ""
211		-	else:
	231	+	ids_data_text = ""
	232	+	if self.result.ids_data:
212	233		ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
213	234
214		-	def make_colored_terminal_notify(
215		-	status, text, status_color, text_color, appendix
216		-	):
217		-	text = [
218		-	f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
219		-	+ f"{text_color} {text}: {Style.RESET_ALL}"
220		-	+ f"{appendix}"
221		-	]
222		-	return "".join(text)
223		-
224		-	def make_simple_terminal_notify(status, text, appendix):
225		-	return f"[{status}] {text}: {appendix}"
226		-
227		-	def make_terminal_notify(is_colored=True, *args):
228		-	if is_colored:
229		-	return make_colored_terminal_notify(*args)
230		-	else:
231		-	return make_simple_terminal_notify(*args)
232		-
233		-	notify = None
234		-
235	235		# Output to the terminal is desired.
236	236		if result.status == QueryStatus.CLAIMED:
237	237		color = Fore.BLUE if is_similar else Fore.GREEN
238	238		status = "?" if is_similar else "+"
239		-	notify = make_terminal_notify(
240		-	self.color,
	239	+	notify = self.make_terminal_notify(
241	240		status,
242	241		result.site_name,
243	242		color,
		skipped 2 lines
246	245		)
247	246		elif result.status == QueryStatus.AVAILABLE:
248	247		if not self.print_found_only:
249		-	notify = make_terminal_notify(
250		-	self.color,
	248	+	notify = self.make_terminal_notify(
251	249		"-",
252	250		result.site_name,
253	251		Fore.RED,
		skipped 2 lines
256	254		)
257	255		elif result.status == QueryStatus.UNKNOWN:
258	256		if not self.skip_check_errors:
259		-	notify = make_terminal_notify(
260		-	self.color,
	257	+	notify = self.make_terminal_notify(
261	258		"?",
262	259		result.site_name,
263	260		Fore.RED,
		skipped 3 lines
267	264		elif result.status == QueryStatus.ILLEGAL:
268	265		if not self.print_found_only:
269	266		text = "Illegal Username Format For This Site!"
270		-	notify = make_terminal_notify(
271		-	self.color,
	267	+	notify = self.make_terminal_notify(
272	268		"-",
273	269		result.site_name,
274	270		Fore.RED,
		skipped 10 lines
285	281		if notify:
286	282		sys.stdout.write("\x1b[1K\r")
287	283		print(notify)
288		-
289		-	return
290	284
291	285		def __str__(self):
292	286		"""Convert Object To String.
		skipped 11 lines

■ ■ ■ ■ ■ ■

maigret/report.py

		skipped 292 lines
293	293		os.remove(filename)
294	294		workbook = xmind.load(filename)
295	295		sheet = workbook.getPrimarySheet()
296		-	design_sheet(sheet, username, results)
	296	+	design_xmind_sheet(sheet, username, results)
297	297		xmind.save(workbook, path=filename)
298	298
299	299
300		-	def design_sheet(sheet, username, results):
	300	+	def add_xmind_subtopic(userlink, k, v, supposed_data):
	301	+	currentsublabel = userlink.addSubTopic()
	302	+	field = "fullname" if k == "name" else k
	303	+	if field not in supposed_data:
	304	+	supposed_data[field] = []
	305	+	supposed_data[field].append(v)
	306	+	currentsublabel.setTitle("%s: %s" % (k, v))
	307	+
	308	+
	309	+	def design_xmind_sheet(sheet, username, results):
301	310		alltags = {}
302	311		supposed_data = {}
303	312
		skipped 7 lines
311	320
312	321		for website_name in results:
313	322		dictionary = results[website_name]
	323	+	result_status = dictionary.get("status")
	324	+	if result_status.status != QueryStatus.CLAIMED:
	325	+	continue
314	326
315		-	if dictionary.get("status").status == QueryStatus.CLAIMED:
316		-	# firsttime I found that entry
317		-	for tag in dictionary.get("status").tags:
318		-	if tag.strip() == "":
319		-	continue
320		-	if tag not in alltags.keys():
321		-	if not is_country_tag(tag):
322		-	tagsection = root_topic1.addSubTopic()
323		-	tagsection.setTitle(tag)
324		-	alltags[tag] = tagsection
	327	+	stripped_tags = list(map(lambda x: x.strip(), result_status.tags))
	328	+	normalized_tags = list(
	329	+	filter(lambda x: x and not is_country_tag(x), stripped_tags)
	330	+	)
325	331
326		-	category = None
327		-	for tag in dictionary.get("status").tags:
328		-	if tag.strip() == "":
329		-	continue
330		-	if not is_country_tag(tag):
331		-	category = tag
	332	+	category = None
	333	+	for tag in normalized_tags:
	334	+	if tag in alltags.keys():
	335	+	continue
	336	+	tagsection = root_topic1.addSubTopic()
	337	+	tagsection.setTitle(tag)
	338	+	alltags[tag] = tagsection
	339	+	category = tag
332	340
333		-	if category is None:
334		-	userlink = undefinedsection.addSubTopic()
335		-	userlink.addLabel(dictionary.get("status").site_url_user)
	341	+	section = alltags[category] if category else undefinedsection
	342	+	userlink = section.addSubTopic()
	343	+	userlink.addLabel(result_status.site_url_user)
	344	+
	345	+	ids_data = result_status.ids_data or {}
	346	+	for k, v in ids_data.items():
	347	+	# suppose target data
	348	+	if isinstance(v, list):
	349	+	for currentval in v:
	350	+	add_xmind_subtopic(userlink, k, currentval, supposed_data)
336	351		else:
337		-	userlink = alltags[category].addSubTopic()
338		-	userlink.addLabel(dictionary.get("status").site_url_user)
	352	+	add_xmind_subtopic(userlink, k, v, supposed_data)
339	353
340		-	if dictionary.get("status").ids_data:
341		-	for k, v in dictionary.get("status").ids_data.items():
342		-	# suppose target data
343		-	if not isinstance(v, list):
344		-	currentsublabel = userlink.addSubTopic()
345		-	field = "fullname" if k == "name" else k
346		-	if field not in supposed_data:
347		-	supposed_data[field] = []
348		-	supposed_data[field].append(v)
349		-	currentsublabel.setTitle("%s: %s" % (k, v))
350		-	else:
351		-	for currentval in v:
352		-	currentsublabel = userlink.addSubTopic()
353		-	field = "fullname" if k == "name" else k
354		-	if field not in supposed_data:
355		-	supposed_data[field] = []
356		-	supposed_data[field].append(currentval)
357		-	currentsublabel.setTitle("%s: %s" % (k, currentval))
358	354		# add supposed data
359		-	filterede_supposed_data = filter_supposed_data(supposed_data)
360		-	if len(filterede_supposed_data) > 0:
	355	+	filtered_supposed_data = filter_supposed_data(supposed_data)
	356	+	if len(filtered_supposed_data) > 0:
361	357		undefinedsection = root_topic1.addSubTopic()
362	358		undefinedsection.setTitle("SUPPOSED DATA")
363		-	for k, v in filterede_supposed_data.items():
	359	+	for k, v in filtered_supposed_data.items():
364	360		currentsublabel = undefinedsection.addSubTopic()
365	361		currentsublabel.setTitle("%s: %s" % (k, v))
366	362
		skipped 9 lines

■ ■ ■ ■ ■ ■

maigret/sites.py

		skipped 166 lines
167	167
168	168		return result
169	169
	170	+	def get_url_type(self) -> str:
	171	+	url = URLMatcher.extract_main_part(self.url)
	172	+	if url.startswith("{username}"):
	173	+	url = "SUBDOMAIN"
	174	+	elif url == "":
	175	+	url = f"{self.url} ({self.engine})"
	176	+	else:
	177	+	parts = url.split("/")
	178	+	url = "/" + "/".join(parts[1:])
	179	+	return url
	180	+
170	181		def update(self, updates: "dict") -> "MaigretSite":
171	182		self.__dict__.update(updates)
172	183		self.update_detectors()
		skipped 232 lines
405	416		if not sites_dict:
406	417		sites_dict = self.sites_dict()
407	418
	419	+	urls = {}
	420	+	tags = {}
408	421		output = ""
409	422		disabled_count = 0
410	423		total_count = len(sites_dict)
411		-	urls = {}
412		-	tags = {}
413	424
414	425		for _, site in sites_dict.items():
415	426		if site.disabled:
416	427		disabled_count += 1
417	428
418		-	url = URLMatcher.extract_main_part(site.url)
419		-	if url.startswith("{username}"):
420		-	url = "SUBDOMAIN"
421		-	elif url == "":
422		-	url = f"{site.url} ({site.engine})"
423		-	else:
424		-	parts = url.split("/")
425		-	url = "/" + "/".join(parts[1:])
426		-
427		-	urls[url] = urls.get(url, 0) + 1
	429	+	url_type = site.get_url_type()
	430	+	urls[url_type] = urls.get(url_type, 0) + 1
428	431
429	432		if not site.tags:
430	433		tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
431	434
432		-	for tag in site.tags:
433		-	if is_country_tag(tag):
434		-	# currenty do not display country tags
435		-	continue
	435	+	for tag in filter(lambda x: not is_country_tag(x), site.tags):
436	436		tags[tag] = tags.get(tag, 0) + 1
437	437
438	438		output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
		skipped 2 lines
441	441		if count == 1:
442	442		break
443	443		output += f"{count}\t{url}\n"
	444	+
444	445		output += "Top sites' tags:\n"
445		-	for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True):
	446	+	for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:20]:
446	447		mark = ""
447	448		if tag not in SUPPORTED_TAGS:
448	449		mark = " (non-standard)"
		skipped 4 lines

Merge pull request #124 from soxoj/refactoring-complexity-decrease