■ ■ ■ ■ ■ ■
utils/update_site_data.py
| skipped 23 lines |
24 | 24 | | '50000000': '10M', |
25 | 25 | | }) |
26 | 26 | | |
| 27 | + | SEMAPHORE = threading.Semaphore(10) |
| 28 | + | |
27 | 29 | | def get_rank(domain_to_query, site, print_errors=True): |
28 | | - | #Retrieve ranking data via alexa API |
29 | | - | url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}" |
30 | | - | xml_data = requests.get(url).text |
31 | | - | root = ET.fromstring(xml_data) |
| 30 | + | with SEMAPHORE: |
| 31 | + | #Retrieve ranking data via alexa API |
| 32 | + | url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}" |
| 33 | + | xml_data = requests.get(url).text |
| 34 | + | root = ET.fromstring(xml_data) |
32 | 35 | | |
33 | | - | try: |
34 | | - | #Get ranking for this site. |
35 | | - | site.alexa_rank = int(root.find('.//REACH').attrib['RANK']) |
36 | | - | country = root.find('.//COUNTRY') |
37 | | - | if not country is None and country.attrib: |
38 | | - | country_code = country.attrib['CODE'] |
39 | | - | tags = set(site.tags) |
40 | | - | if country_code: |
41 | | - | tags.add(country_code.lower()) |
42 | | - | site.tags = sorted(list(tags)) |
43 | | - | if site.type != 'username': |
44 | | - | site.disabled = False |
45 | | - | except Exception as e: |
46 | | - | if print_errors: |
47 | | - | logging.error(e) |
48 | | - | # We did not find the rank for some reason. |
49 | | - | print(f"Error retrieving rank information for '{domain_to_query}'") |
50 | | - | print(f" Returned XML is |{xml_data}|") |
| 36 | + | try: |
| 37 | + | #Get ranking for this site. |
| 38 | + | site.alexa_rank = int(root.find('.//REACH').attrib['RANK']) |
| 39 | + | country = root.find('.//COUNTRY') |
| 40 | + | if not country is None and country.attrib: |
| 41 | + | country_code = country.attrib['CODE'] |
| 42 | + | tags = set(site.tags) |
| 43 | + | if country_code: |
| 44 | + | tags.add(country_code.lower()) |
| 45 | + | site.tags = sorted(list(tags)) |
| 46 | + | if site.type != 'username': |
| 47 | + | site.disabled = False |
| 48 | + | except Exception as e: |
| 49 | + | if print_errors: |
| 50 | + | logging.error(e) |
| 51 | + | # We did not find the rank for some reason. |
| 52 | + | print(f"Error retrieving rank information for '{domain_to_query}'") |
| 53 | + | print(f" Returned XML is |{xml_data}|") |
51 | 54 | | |
52 | | - | return |
| 55 | + | return |
53 | 56 | | |
54 | 57 | | |
55 | 58 | | def get_step_rank(rank): |
| skipped 73 lines |