STRLCPY/maigret

Merge branch 'main' into cookies-support
soxoj committed with GitHub 4 years ago

745dcda2

2 parents
cf8d0958
2a9558ca

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

■ ■ ■ ■ ■ ■

maigret/maigret.py

		skipped 578 lines
579	579		return changes
580	580
581	581
582		-	async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False) -> bool:
583		-	sem = asyncio.Semaphore(10)
	582	+	async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
	583	+	max_connections=10) -> bool:
	584	+	sem = asyncio.Semaphore(max_connections)
584	585		tasks = []
585	586		all_sites = site_data
586	587
		skipped 228 lines
815	816		# Database self-checking
816	817		if args.self_check:
817	818		print('Maigret sites database self-checking...')
818		-	is_need_update = await self_check(db, site_data, logger)
	819	+	is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
819	820		if is_need_update:
820	821		if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
821	822		db.save_to_file(args.json_file)
		skipped 128 lines

maigret/resources/data.json

Diff is too large to be displayed.

■ ■ ■ ■ ■ ■

maigret/sites.py

		skipped 117 lines
118	118		# remove list items
119	119		if isinstance(engine_data[k], list) and is_exists:
120	120		for f in engine_data[k]:
121		-	self_copy.__dict__[field].remove(f)
	121	+	if f in self_copy.__dict__[field]:
	122	+	self_copy.__dict__[field].remove(f)
122	123		continue
123	124		if is_exists:
124	125		del self_copy.__dict__[field]
		skipped 18 lines
143	144		normalized_names = list(map(str.lower, names))
144	145		normalized_tags = list(map(str.lower, tags))
145	146
146		-	is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
	147	+	def is_tags_ok(site):
	148	+	intersected_tags = set(site.tags).intersection(set(normalized_tags))
	149	+	is_disabled = 'disabled' in tags and site.disabled
	150	+	return intersected_tags or is_disabled
	151	+
147	152		is_name_ok = lambda x: x.name.lower() in normalized_names
148	153		is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
149	154
		skipped 130 lines

sites.md

Diff is too large to be displayed.

■ ■ ■ ■ ■ ■

utils/update_site_data.py

		skipped 23 lines
24	24		'50000000': '10M',
25	25		})
26	26
	27	+	SEMAPHORE = threading.Semaphore(10)
	28	+
27	29		def get_rank(domain_to_query, site, print_errors=True):
28		-	#Retrieve ranking data via alexa API
29		-	url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
30		-	xml_data = requests.get(url).text
31		-	root = ET.fromstring(xml_data)
	30	+	with SEMAPHORE:
	31	+	#Retrieve ranking data via alexa API
	32	+	url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
	33	+	xml_data = requests.get(url).text
	34	+	root = ET.fromstring(xml_data)
32	35
33		-	try:
34		-	#Get ranking for this site.
35		-	site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
36		-	country = root.find('.//COUNTRY')
37		-	if not country is None and country.attrib:
38		-	country_code = country.attrib['CODE']
39		-	tags = set(site.tags)
40		-	if country_code:
41		-	tags.add(country_code.lower())
42		-	site.tags = sorted(list(tags))
43		-	if site.type != 'username':
44		-	site.disabled = False
45		-	except Exception as e:
46		-	if print_errors:
47		-	logging.error(e)
48		-	# We did not find the rank for some reason.
49		-	print(f"Error retrieving rank information for '{domain_to_query}'")
50		-	print(f" Returned XML is \|{xml_data}\|")
	36	+	try:
	37	+	#Get ranking for this site.
	38	+	site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
	39	+	country = root.find('.//COUNTRY')
	40	+	if not country is None and country.attrib:
	41	+	country_code = country.attrib['CODE']
	42	+	tags = set(site.tags)
	43	+	if country_code:
	44	+	tags.add(country_code.lower())
	45	+	site.tags = sorted(list(tags))
	46	+	if site.type != 'username':
	47	+	site.disabled = False
	48	+	except Exception as e:
	49	+	if print_errors:
	50	+	logging.error(e)
	51	+	# We did not find the rank for some reason.
	52	+	print(f"Error retrieving rank information for '{domain_to_query}'")
	53	+	print(f" Returned XML is \|{xml_data}\|")
51	54
52		-	return
	55	+	return
53	56
54	57
55	58		def get_step_rank(rank):
		skipped 73 lines

Merge branch 'main' into cookies-support