STRLCPY/maigret

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

■ ■ ■ ■ ■ ■

maigret/sites.py

		skipped 60 lines
61	61		"military",
62	62		"auto",
63	63		"gambling",
	64	+	"business",
	65	+	"cybercriminal",
	66	+	"review",
64	67		]
65	68
66	69
		skipped 405 lines
472	475		output += f"{count}\t{url}\n"
473	476
474	477		output += "Top tags:\n"
475		-	for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:20]:
	478	+	for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
476	479		mark = ""
477	480		if tag not in SUPPORTED_TAGS:
478	481		mark = " (non-standard)"
		skipped 4 lines

■ ■ ■ ■ ■ ■ ■

maigret/submit.py

		skipped 1 lines
2	2		import difflib
3	3		import re
4	4		from typing import List
5		-
	5	+	import xml.etree.ElementTree as ET
6	6		import requests
7	7
8	8		from .activation import import_aiohttp_cookies
		skipped 35 lines
44	44		),
45	45		2,
46	46		)
	47	+
	48	+
	49	+	def get_alexa_rank(site_url_main):
	50	+	url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
	51	+	xml_data = requests.get(url).text
	52	+	root = ET.fromstring(xml_data)
	53	+	alexa_rank = 0
	54	+
	55	+	try:
	56	+	alexa_rank = int(root.find('.//REACH').attrib['RANK'])
	57	+	except Exception:
	58	+	pass
	59	+
	60	+	return alexa_rank
47	61
48	62
49	63		def extract_mainpage_url(url):
		skipped 302 lines

■ ■ ■ ■ ■ ■

utils/add_tags.py

1	+	#!/usr/bin/env python3
2	+	import random
3	+	from argparse import ArgumentParser, RawDescriptionHelpFormatter
4	+
5	+	from maigret.maigret import MaigretDatabase
6	+	from maigret.submit import get_alexa_rank
7	+
8	+
9	+	def update_tags(site):
10	+	tags = []
11	+	if not site.tags:
12	+	print(f'Site {site.name} doesn\'t have tags')
13	+	else:
14	+	tags = site.tags
15	+	print(f'Site {site.name} tags: ' + ', '.join(tags))
16	+
17	+	print(f'URL: {site.url_main}')
18	+
19	+	new_tags = set(input('Enter new tags: ').split(', '))
20	+	if "disabled" in new_tags:
21	+	new_tags.remove("disabled")
22	+	site.disabled = True
23	+
24	+	print(f'Old alexa rank: {site.alexa_rank}')
25	+	rank = get_alexa_rank(site.url_main)
26	+	if rank:
27	+	print(f'New alexa rank: {rank}')
28	+	site.alexa_rank = rank
29	+
30	+	site.tags = [x for x in list(new_tags) if x]
31	+
32	+
33	+	if __name__ == '__main__':
34	+	parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
35	+	)
36	+	parser.add_argument("--base","-b", metavar="BASE_FILE",
37	+	dest="base_file", default="maigret/resources/data.json",
38	+	help="JSON file with sites data to update.")
39	+
40	+	pool = list()
41	+
42	+	args = parser.parse_args()
43	+
44	+	db = MaigretDatabase()
45	+	db.load_from_file(args.base_file).sites
46	+
47	+	while True:
48	+	site = random.choice(db.sites)
49	+	if site.engine == 'uCoz' or site.tags:
50	+	continue
51	+
52	+	update_tags(site)
53	+
54	+	db.save_to_file(args.base_file)