Projects STRLCPY maigret Commits 43f189f7
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • maigret/resources/data.json
    Diff is too large to be displayed.
  • ■ ■ ■ ■
    maigret/sites.py
    skipped 60 lines
    61 61   "military",
    62 62   "auto",
    63 63   "gambling",
     64 + "business",
     65 + "cybercriminal",
     66 + "review",
    64 67  ]
    65 68   
    66 69   
    skipped 405 lines
    472 475   output += f"{count}\t{url}\n"
    473 476   
    474 477   output += "Top tags:\n"
    475  - for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:20]:
     478 + for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
    476 479   mark = ""
    477 480   if tag not in SUPPORTED_TAGS:
    478 481   mark = " (non-standard)"
    skipped 4 lines
  • ■ ■ ■ ■ ■ ■
    maigret/submit.py
    skipped 1 lines
    2 2  import difflib
    3 3  import re
    4 4  from typing import List
    5  - 
     5 +import xml.etree.ElementTree as ET
    6 6  import requests
    7 7   
    8 8  from .activation import import_aiohttp_cookies
    skipped 35 lines
    44 44   ),
    45 45   2,
    46 46   )
     47 + 
     48 + 
     49 +def get_alexa_rank(site_url_main):
     50 + url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
     51 + xml_data = requests.get(url).text
     52 + root = ET.fromstring(xml_data)
     53 + alexa_rank = 0
     54 + 
     55 + try:
     56 + alexa_rank = int(root.find('.//REACH').attrib['RANK'])
     57 + except Exception:
     58 + pass
     59 + 
     60 + return alexa_rank
    47 61   
    48 62   
    49 63  def extract_mainpage_url(url):
    skipped 302 lines
  • ■ ■ ■ ■ ■ ■
    utils/add_tags.py
     1 +#!/usr/bin/env python3
     2 +import random
     3 +from argparse import ArgumentParser, RawDescriptionHelpFormatter
     4 + 
     5 +from maigret.maigret import MaigretDatabase
     6 +from maigret.submit import get_alexa_rank
     7 + 
     8 + 
     9 +def update_tags(site):
     10 + tags = []
     11 + if not site.tags:
     12 + print(f'Site {site.name} doesn\'t have tags')
     13 + else:
     14 + tags = site.tags
     15 + print(f'Site {site.name} tags: ' + ', '.join(tags))
     16 + 
     17 + print(f'URL: {site.url_main}')
     18 + 
     19 + new_tags = set(input('Enter new tags: ').split(', '))
     20 + if "disabled" in new_tags:
     21 + new_tags.remove("disabled")
     22 + site.disabled = True
     23 + 
     24 + print(f'Old alexa rank: {site.alexa_rank}')
     25 + rank = get_alexa_rank(site.url_main)
     26 + if rank:
     27 + print(f'New alexa rank: {rank}')
     28 + site.alexa_rank = rank
     29 + 
     30 + site.tags = [x for x in list(new_tags) if x]
     31 + 
     32 + 
     33 +if __name__ == '__main__':
     34 + parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
     35 + )
     36 + parser.add_argument("--base","-b", metavar="BASE_FILE",
     37 + dest="base_file", default="maigret/resources/data.json",
     38 + help="JSON file with sites data to update.")
     39 + 
     40 + pool = list()
     41 + 
     42 + args = parser.parse_args()
     43 + 
     44 + db = MaigretDatabase()
     45 + db.load_from_file(args.base_file).sites
     46 + 
     47 + while True:
     48 + site = random.choice(db.sites)
     49 + if site.engine == 'uCoz' or site.tags:
     50 + continue
     51 + 
     52 + update_tags(site)
     53 + 
     54 + db.save_to_file(args.base_file)
Please wait...
Page is in error, reload to recover