STRLCPY/maigret

Added DB statistics autoupdate and write to sites.md (#357)
Soxoj committed with Soxoj 3 years ago

3ba0e46d

1 parent db53f1f2

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

■ ■ ■ ■ ■ ■

maigret/maigret.py

		skipped 565 lines
566	566
567	567		# Database statistics
568	568		if args.stats:
569		-	print(db.get_db_stats(db.sites_dict))
	569	+	print(db.get_db_stats())
570	570
571	571		report_dir = path.join(os.getcwd(), args.folderoutput)
572	572
		skipped 159 lines

■ ■ ■ ■ ■ ■

maigret/sites.py

		skipped 418 lines
419	419		results[_id] = _type
420	420		return results
421	421
422		-	def get_db_stats(self, sites_dict):
423		-	if not sites_dict:
424		-	sites_dict = self.sites_dict()
	422	+	def get_db_stats(self, is_markdown=False):
	423	+	sites_dict = self.sites_dict
425	424
426	425		urls = {}
427	426		tags = {}
428	427		output = ""
429	428		disabled_count = 0
430	429		total_count = len(sites_dict)
	430	+
	431	+	message_checks = 0
	432	+	message_checks_one_factor = 0
431	433
432	434		for _, site in sites_dict.items():
433	435		if site.disabled:
		skipped 2 lines
436	438		url_type = site.get_url_template()
437	439		urls[url_type] = urls.get(url_type, 0) + 1
438	440
	441	+	if site.check_type == 'message':
	442	+	message_checks += 1
	443	+	if site.absence_strs and site.presense_strs:
	444	+	continue
	445	+	message_checks_one_factor += 1
	446	+
439	447		if not site.tags:
440	448		tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
441	449
442	450		for tag in filter(lambda x: not is_country_tag(x), site.tags):
443	451		tags[tag] = tags.get(tag, 0) + 1
444	452
445		-	output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
446		-	output += "Top profile URLs:\n"
447		-	for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
	453	+	output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n\n"
	454	+	output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} (false positive risks)\n\n"
	455	+
	456	+	top_urls_count = 20
	457	+	output += f"Top {top_urls_count} profile URLs:\n"
	458	+	for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:top_urls_count]:
448	459		if count == 1:
449	460		break
450		-	output += f"{count}\t{url}\n"
	461	+	output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n"
451	462
452		-	output += "Top tags:\n"
453		-	for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
	463	+	top_tags_count = 20
	464	+	output += f"\nTop {top_tags_count} tags:\n"
	465	+	for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:top_tags_count]:
454	466		mark = ""
455	467		if tag not in self._tags:
456	468		mark = " (non-standard)"
457		-	output += f"{count}\t{tag}{mark}\n"
	469	+	output += f"- ({count})\t`{tag}`{mark}\n" if is_markdown else f"{count}\t{tag}{mark}\n"
458	470
459	471		return output
460	472

■ ■ ■ ■ ■ ■

utils/update_site_data.py

		skipped 139 lines
140	140		site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
141	141		db.save_to_file(args.base_file)
142	142
	143	+	statistics_text = db.get_db_stats(is_markdown=True)
	144	+	site_file.write('## Statistics\n\n')
	145	+	site_file.write(statistics_text)
	146	+
143	147		print("\nFinished updating supported site listing!")
144	148

Added DB statistics autoupdate and write to sites.md (#357)