Projects STRLCPY maigret Commits 745dcda2
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    maigret/maigret.py
    skipped 578 lines
    579 579   return changes
    580 580   
    581 581   
    582  -async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False) -> bool:
    583  - sem = asyncio.Semaphore(10)
     582 +async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
     583 + max_connections=10) -> bool:
     584 + sem = asyncio.Semaphore(max_connections)
    584 585   tasks = []
    585 586   all_sites = site_data
    586 587   
    skipped 228 lines
    815 816   # Database self-checking
    816 817   if args.self_check:
    817 818   print('Maigret sites database self-checking...')
    818  - is_need_update = await self_check(db, site_data, logger)
     819 + is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
    819 820   if is_need_update:
    820 821   if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
    821 822   db.save_to_file(args.json_file)
    skipped 128 lines
  • maigret/resources/data.json
    Diff is too large to be displayed.
  • ■ ■ ■ ■ ■
    maigret/sites.py
    skipped 117 lines
    118 118   # remove list items
    119 119   if isinstance(engine_data[k], list) and is_exists:
    120 120   for f in engine_data[k]:
    121  - self_copy.__dict__[field].remove(f)
     121 + if f in self_copy.__dict__[field]:
     122 + self_copy.__dict__[field].remove(f)
    122 123   continue
    123 124   if is_exists:
    124 125   del self_copy.__dict__[field]
    skipped 18 lines
    143 144   normalized_names = list(map(str.lower, names))
    144 145   normalized_tags = list(map(str.lower, tags))
    145 146   
    146  - is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
     147 + def is_tags_ok(site):
     148 + intersected_tags = set(site.tags).intersection(set(normalized_tags))
     149 + is_disabled = 'disabled' in tags and site.disabled
     150 + return intersected_tags or is_disabled
     151 + 
    147 152   is_name_ok = lambda x: x.name.lower() in normalized_names
    148 153   is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
    149 154   
    skipped 130 lines
  • sites.md
    Diff is too large to be displayed.
  • ■ ■ ■ ■ ■ ■
    utils/update_site_data.py
    skipped 23 lines
    24 24   '50000000': '10M',
    25 25  })
    26 26   
     27 +SEMAPHORE = threading.Semaphore(10)
     28 + 
    27 29  def get_rank(domain_to_query, site, print_errors=True):
    28  - #Retrieve ranking data via alexa API
    29  - url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
    30  - xml_data = requests.get(url).text
    31  - root = ET.fromstring(xml_data)
     30 + with SEMAPHORE:
     31 + #Retrieve ranking data via alexa API
     32 + url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
     33 + xml_data = requests.get(url).text
     34 + root = ET.fromstring(xml_data)
    32 35   
    33  - try:
    34  - #Get ranking for this site.
    35  - site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
    36  - country = root.find('.//COUNTRY')
    37  - if not country is None and country.attrib:
    38  - country_code = country.attrib['CODE']
    39  - tags = set(site.tags)
    40  - if country_code:
    41  - tags.add(country_code.lower())
    42  - site.tags = sorted(list(tags))
    43  - if site.type != 'username':
    44  - site.disabled = False
    45  - except Exception as e:
    46  - if print_errors:
    47  - logging.error(e)
    48  - # We did not find the rank for some reason.
    49  - print(f"Error retrieving rank information for '{domain_to_query}'")
    50  - print(f" Returned XML is |{xml_data}|")
     36 + try:
     37 + #Get ranking for this site.
     38 + site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
     39 + country = root.find('.//COUNTRY')
     40 + if not country is None and country.attrib:
     41 + country_code = country.attrib['CODE']
     42 + tags = set(site.tags)
     43 + if country_code:
     44 + tags.add(country_code.lower())
     45 + site.tags = sorted(list(tags))
     46 + if site.type != 'username':
     47 + site.disabled = False
     48 + except Exception as e:
     49 + if print_errors:
     50 + logging.error(e)
     51 + # We did not find the rank for some reason.
     52 + print(f"Error retrieving rank information for '{domain_to_query}'")
     53 + print(f" Returned XML is |{xml_data}|")
    51 54   
    52  - return
     55 + return
    53 56   
    54 57   
    55 58  def get_step_rank(rank):
    skipped 73 lines
Please wait...
Page is in error, reload to recover