Projects STRLCPY maigret Commits ecabf88c
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■
    maigret/maigret.py
    skipped 535 lines
    536 536   site_data = get_top_sites_for_id(args.id_type)
    537 537   
    538 538   if args.new_site_to_submit:
    539  - submitter = Submitter(db=db, logger=logger, settings=settings)
     539 + submitter = Submitter(db=db, logger=logger, settings=settings, args=args)
    540 540   is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
    541 541   if is_submitted:
    542 542   db.save_to_file(db_file)
    skipped 189 lines
  • ■ ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 1832 lines
    1833 1833   "usernameUnclaimed": "noonewouldeverusethis7"
    1834 1834   },
    1835 1835   "Bestfantasybooks": {
     1836 + "disabled": true,
    1836 1837   "tags": [
    1837 1838   "us"
    1838 1839   ],
    skipped 2593 lines
    4432 4433   ]
    4433 4434   },
    4434 4435   "Facenama": {
     4436 + "disabled": true,
    4435 4437   "tags": [
    4436 4438   "ir"
    4437 4439   ],
    skipped 24002 lines
    28440 28442   "usernameUnclaimed": "noonewouldeverusethis7",
    28441 28443   "checkType": "message",
    28442 28444   "alexaRank": 6859
     28445 + },
     28446 + "Worldis.me": {
     28447 + "absenceStrs": [
     28448 + "user_password",
     28449 + "send_email"
     28450 + ],
     28451 + "presenseStrs": [
     28452 + "my_profile",
     28453 + "profile_upi",
     28454 + "UserInfo"
     28455 + ],
     28456 + "url": "http://en.worldis.me/{username}",
     28457 + "urlMain": "http://en.worldis.me",
     28458 + "usernameClaimed": "admin",
     28459 + "usernameUnclaimed": "noonewouldeverusethis7",
     28460 + "checkType": "message",
     28461 + "alexaRank": 3233509,
     28462 + "tags": [
     28463 + "ru"
     28464 + ]
     28465 + },
     28466 + "photoshop-kopona.com": {
     28467 + "absenceStrs": [
     28468 + "<title>noonewouldeverusethis7 &raquo; \u0420\u0435\u0441\u0443\u0440\u0441\u044b \u0434\u043b\u044f \u0424\u043e\u0442\u043e\u0448\u043e\u043f\u0430</title>"
     28469 + ],
     28470 + "presenseStrs": [
     28471 + "offline",
     28472 + "uspusertitle"
     28473 + ],
     28474 + "url": "https://photoshop-kopona.com/ru/user/{username}/",
     28475 + "urlMain": "https://photoshop-kopona.com",
     28476 + "usernameClaimed": "test",
     28477 + "usernameUnclaimed": "noonewouldeverusethis7",
     28478 + "checkType": "message",
     28479 + "alexaRank": 44106,
     28480 + "tags": [
     28481 + "ru"
     28482 + ]
     28483 + },
     28484 + "dumskaya.net": {
     28485 + "absenceStrs": [
     28486 + "><img class=nobo src=/banner/ps2_/ alt="
     28487 + ],
     28488 + "presenseStrs": [
     28489 + "><img class=nobo src=/banner/prague_/ alt="
     28490 + ],
     28491 + "url": "https://dumskaya.net/user/{username}/",
     28492 + "urlMain": "https://dumskaya.net",
     28493 + "usernameClaimed": "test",
     28494 + "usernameUnclaimed": "noonewouldeverusethis7",
     28495 + "checkType": "message",
     28496 + "alexaRank": 73617,
     28497 + "tags": [
     28498 + "ru"
     28499 + ]
     28500 + },
     28501 + "rblx.trade": {
     28502 + "absenceStrs": [
     28503 + "isRblxTradeException"
     28504 + ],
     28505 + "presenseStrs": [
     28506 + "userId"
     28507 + ],
     28508 + "url": "https://rblx.trade/p/{username}",
     28509 + "urlMain": "https://rblx.trade",
     28510 + "usernameClaimed": "test",
     28511 + "usernameUnclaimed": "noonewouldeverusethis7",
     28512 + "checkType": "message",
     28513 + "alexaRank": 362185,
     28514 + "tags": [
     28515 + "gaming"
     28516 + ]
     28517 + },
     28518 + "monitoringminecraft.ru": {
     28519 + "absenceStrs": [
     28520 + "shadowi"
     28521 + ],
     28522 + "presenseStrs": [
     28523 + "small"
     28524 + ],
     28525 + "url": "https://monitoringminecraft.ru/player/{username}",
     28526 + "urlMain": "https://monitoringminecraft.ru",
     28527 + "usernameClaimed": "test",
     28528 + "usernameUnclaimed": "noonewouldeverusethis7",
     28529 + "checkType": "message",
     28530 + "alexaRank": 115209,
     28531 + "tags": [
     28532 + "gaming"
     28533 + ]
     28534 + },
     28535 + "profi.ru": {
     28536 + "absenceStrs": [
     28537 + "page-404__paragraph"
     28538 + ],
     28539 + "presenseStrs": [
     28540 + "PROFILE",
     28541 + "profiles",
     28542 + "profileOIO",
     28543 + "fullProfile",
     28544 + "profileUGC2"
     28545 + ],
     28546 + "url": "https://profi.ru/profile/{username}/",
     28547 + "urlMain": "https://profi.ru",
     28548 + "usernameClaimed": "EgorovRV",
     28549 + "usernameUnclaimed": "noonewouldeverusethis7",
     28550 + "checkType": "message",
     28551 + "alexaRank": 12037,
     28552 + "tags": [
     28553 + "freelance"
     28554 + ]
     28555 + },
     28556 + "app.airnfts.com": {
     28557 + "absenceStrs": [
     28558 + "user-not-found-div"
     28559 + ],
     28560 + "presenseStrs": [
     28561 + "username",
     28562 + "ownerUsername",
     28563 + "creatorUsername",
     28564 + "name",
     28565 + "user"
     28566 + ],
     28567 + "url": "https://app.airnfts.com/creators/{username}",
     28568 + "urlMain": "https://app.airnfts.com",
     28569 + "usernameClaimed": "test",
     28570 + "usernameUnclaimed": "noonewouldeverusethis7",
     28571 + "checkType": "message",
     28572 + "alexaRank": 30223
     28573 + },
     28574 + "xgm.guru": {
     28575 + "absenceStrs": [
     28576 + ">Username:</label>"
     28577 + ],
     28578 + "presenseStrs": [
     28579 + "email",
     28580 + "usernamereg",
     28581 + "username-top",
     28582 + "\u041e\u043f\u044b\u0442 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f",
     28583 + "check-username"
     28584 + ],
     28585 + "url": "https://xgm.guru/user/{username}",
     28586 + "urlMain": "https://xgm.guru",
     28587 + "usernameClaimed": "test",
     28588 + "usernameUnclaimed": "noonewouldeverusethis7",
     28589 + "checkType": "message",
     28590 + "alexaRank": 692341,
     28591 + "tags": [
     28592 + "forum",
     28593 + "gaming"
     28594 + ]
    28443 28595   }
    28444 28596   },
    28445 28597   "engines": {
    skipped 263 lines
  • ■ ■ ■ ■ ■ ■
    maigret/submit.py
    skipped 2 lines
    3 3  import re
    4 4  from typing import List
    5 5  import xml.etree.ElementTree as ET
     6 +from aiohttp import TCPConnector, ClientSession
    6 7  import requests
    7 8   
    8 9  from .activation import import_aiohttp_cookies
    skipped 15 lines
    24 25   TOP_FEATURES = 5
    25 26   URL_RE = re.compile(r"https?://(www\.)?")
    26 27   
    27  - def __init__(self, db: MaigretDatabase, settings: Settings, logger):
     28 + def __init__(self, db: MaigretDatabase, settings: Settings, logger, args):
    28 29   self.settings = settings
     30 + self.args = args
    29 31   self.db = db
    30 32   self.logger = logger
    31 33   
     34 + from aiohttp_socks import ProxyConnector
     35 + proxy = self.args.proxy
     36 + cookie_jar = None
     37 + if args.cookie_file:
     38 + cookie_jar = import_aiohttp_cookies(args.cookie_file)
     39 + 
     40 + connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
     41 + connector.verify_ssl = False
     42 + self.session = ClientSession(
     43 + connector=connector, trust_env=True, cookie_jar=cookie_jar
     44 + )
     45 + 
    32 46   @staticmethod
    33 47   def get_alexa_rank(site_url_main):
    34 48   url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
    skipped 28 lines
    63 77   results_dict = await maigret(
    64 78   username=username,
    65 79   site_dict={site.name: site},
     80 + proxy=self.args.proxy,
    66 81   logger=self.logger,
    67 82   timeout=30,
    68 83   id_type=site.type,
    skipped 57 lines
    126 141   return fields
    127 142   
    128 143   async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
     144 + resp_text = ''
    129 145   try:
    130  - r = requests.get(url_mainpage)
    131  - self.logger.debug(r.text)
     146 + r = await self.session.get(url_mainpage)
     147 + resp_text = await r.text()
     148 + self.logger.debug(resp_text)
    132 149   except Exception as e:
    133 150   self.logger.warning(e)
    134 151   print("Some error while checking main page")
    skipped 1 lines
    136 153   
    137 154   for engine in self.db.engines:
    138 155   strs_to_check = engine.__dict__.get("presenseStrs")
    139  - if strs_to_check and r and r.text:
     156 + if strs_to_check and resp_text:
    140 157   all_strs_in_response = True
    141 158   for s in strs_to_check:
    142  - if s not in r.text:
     159 + if s not in resp_text:
    143 160   all_strs_in_response = False
    144 161   sites = []
    145 162   if all_strs_in_response:
    skipped 63 lines
    209 226   headers = dict(self.HEADERS)
    210 227   headers.update(custom_headers)
    211 228   
    212  - # cookies
    213  - cookie_dict = None
    214  - if cookie_file:
    215  - self.logger.info(f'Use {cookie_file} for cookies')
    216  - cookie_jar = import_aiohttp_cookies(cookie_file)
    217  - cookie_dict = {c.key: c.value for c in cookie_jar}
    218  - 
    219  - exists_resp = requests.get(
    220  - url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
     229 + exists_resp = await self.session.get(
     230 + url_exists,
     231 + headers=headers,
     232 + allow_redirects=redirects,
    221 233   )
     234 + exists_resp_text = await exists_resp.text()
    222 235   self.logger.debug(url_exists)
    223  - self.logger.debug(exists_resp.status_code)
    224  - self.logger.debug(exists_resp.text)
     236 + self.logger.debug(exists_resp.status)
     237 + self.logger.debug(exists_resp_text)
    225 238   
    226  - non_exists_resp = requests.get(
     239 + non_exists_resp = await self.session.get(
    227 240   url_not_exists,
    228  - cookies=cookie_dict,
    229 241   headers=headers,
    230 242   allow_redirects=redirects,
    231 243   )
     244 + non_exists_resp_text = await non_exists_resp.text()
    232 245   self.logger.debug(url_not_exists)
    233  - self.logger.debug(non_exists_resp.status_code)
    234  - self.logger.debug(non_exists_resp.text)
     246 + self.logger.debug(non_exists_resp.status)
     247 + self.logger.debug(non_exists_resp_text)
    235 248   
    236  - a = exists_resp.text
    237  - b = non_exists_resp.text
     249 + a = exists_resp_text
     250 + b = non_exists_resp_text
    238 251   
    239 252   tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
    240 253   tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
    skipped 144 lines
  • ■ ■ ■ ■
    utils/update_site_data.py
    skipped 24 lines
    25 25   '100000000': '100M',
    26 26  })
    27 27   
    28  -SEMAPHORE = threading.Semaphore(10)
     28 +SEMAPHORE = threading.Semaphore(20)
    29 29   
    30 30  def get_rank(domain_to_query, site, print_errors=True):
    31 31   with SEMAPHORE:
    skipped 113 lines
Please wait...
Page is in error, reload to recover