Projects STRLCPY maigret Commits 4ba7fcb1
🤬
  • ■ ■ ■ ■
    maigret/activation.py
    skipped 34 lines
    35 35   site.headers["authorization"] = f"Bearer {bearer_token}"
    36 36   
    37 37   
    38  -async def import_aiohttp_cookies(cookiestxt_filename):
     38 +def import_aiohttp_cookies(cookiestxt_filename):
    39 39   cookies_obj = MozillaCookieJar(cookiestxt_filename)
    40 40   cookies_obj.load(ignore_discard=True, ignore_expires=True)
    41 41   
    skipped 15 lines
  • ■ ■ ■ ■ ■ ■
    maigret/checking.py
    skipped 42 lines
    43 43  BAD_CHARS = "#"
    44 44   
    45 45   
    46  -async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]:
    47  - html_text = None
    48  - status_code = 0
    49  - error: Optional[CheckError] = CheckError("Unknown")
     46 +class SimpleAiohttpChecker:
     47 + def __init__(self, *args, **kwargs):
     48 + proxy = kwargs.get('proxy')
     49 + cookie_jar = kwargs.get('cookie_jar')
     50 + self.logger = kwargs.get('logger', Mock())
     51 + 
     52 + # make http client session
     53 + connector = (
     54 + ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
     55 + )
     56 + connector.verify_ssl = False
     57 + self.session = aiohttp.ClientSession(
     58 + connector=connector, trust_env=True, cookie_jar=cookie_jar
     59 + )
     60 + 
     61 + def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
     62 + if method == 'get':
     63 + request_method = self.session.get
     64 + else:
     65 + request_method = self.session.head
     66 + 
     67 + future = request_method(
     68 + url=url,
     69 + headers=headers,
     70 + allow_redirects=allow_redirects,
     71 + timeout=timeout,
     72 + )
     73 + 
     74 + return future
     75 + 
     76 + async def close(self):
     77 + await self.session.close()
     78 + 
     79 + async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
     80 + html_text = None
     81 + status_code = 0
     82 + error: Optional[CheckError] = CheckError("Unknown")
     83 + 
     84 + try:
     85 + response = await future
     86 + 
     87 + status_code = response.status
     88 + response_content = await response.content.read()
     89 + charset = response.charset or "utf-8"
     90 + decoded_content = response_content.decode(charset, "ignore")
     91 + html_text = decoded_content
    50 92   
    51  - try:
    52  - response = await request_future
     93 + error = None
     94 + if status_code == 0:
     95 + error = CheckError("Connection lost")
    53 96   
    54  - status_code = response.status
    55  - response_content = await response.content.read()
    56  - charset = response.charset or "utf-8"
    57  - decoded_content = response_content.decode(charset, "ignore")
    58  - html_text = decoded_content
     97 + self.logger.debug(html_text)
     98 + 
     99 + except asyncio.TimeoutError as e:
     100 + error = CheckError("Request timeout", str(e))
     101 + except ClientConnectorError as e:
     102 + error = CheckError("Connecting failure", str(e))
     103 + except ServerDisconnectedError as e:
     104 + error = CheckError("Server disconnected", str(e))
     105 + except aiohttp.http_exceptions.BadHttpMessage as e:
     106 + error = CheckError("HTTP", str(e))
     107 + except proxy_errors.ProxyError as e:
     108 + error = CheckError("Proxy", str(e))
     109 + except KeyboardInterrupt:
     110 + error = CheckError("Interrupted")
     111 + except Exception as e:
     112 + # python-specific exceptions
     113 + if sys.version_info.minor > 6 and (
     114 + isinstance(e, ssl.SSLCertVerificationError)
     115 + or isinstance(e, ssl.SSLError)
     116 + ):
     117 + error = CheckError("SSL", str(e))
     118 + else:
     119 + self.logger.debug(e, exc_info=True)
     120 + error = CheckError("Unexpected", str(e))
    59 121   
    60  - error = None
    61  - if status_code == 0:
    62  - error = CheckError("Connection lost")
     122 + return str(html_text), status_code, error
    63 123   
    64  - logger.debug(html_text)
    65 124   
    66  - except asyncio.TimeoutError as e:
    67  - error = CheckError("Request timeout", str(e))
    68  - except ClientConnectorError as e:
    69  - error = CheckError("Connecting failure", str(e))
    70  - except ServerDisconnectedError as e:
    71  - error = CheckError("Server disconnected", str(e))
    72  - except aiohttp.http_exceptions.BadHttpMessage as e:
    73  - error = CheckError("HTTP", str(e))
    74  - except proxy_errors.ProxyError as e:
    75  - error = CheckError("Proxy", str(e))
    76  - except KeyboardInterrupt:
    77  - error = CheckError("Interrupted")
    78  - except Exception as e:
    79  - # python-specific exceptions
    80  - if sys.version_info.minor > 6 and (
    81  - isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError)
    82  - ):
    83  - error = CheckError("SSL", str(e))
    84  - else:
    85  - logger.debug(e, exc_info=True)
    86  - error = CheckError("Unexpected", str(e))
     125 +class TorAiohttpChecker(SimpleAiohttpChecker):
     126 + def __init__(self, *args, **kwargs):
     127 + proxy = kwargs.get('proxy')
     128 + cookie_jar = kwargs.get('cookie_jar')
     129 + self.logger = kwargs.get('logger', Mock())
    87 130   
    88  - return str(html_text), status_code, error
     131 + connector = ProxyConnector.from_url(proxy)
     132 + connector.verify_ssl = False
     133 + self.session = aiohttp.ClientSession(
     134 + connector=connector, trust_env=True, cookie_jar=cookie_jar
     135 + )
    89 136   
    90 137   
    91 138  # TODO: move to separate class
    skipped 230 lines
    322 369   # workaround to prevent slash errors
    323 370   url = re.sub("(?<!:)/+", "/", url)
    324 371   
    325  - session = options['session']
     372 + # always clearweb_checker for now
     373 + checker = options["checkers"][site.network]
    326 374   
    327 375   # site check is disabled
    328 376   if site.disabled and not options['forced']:
    skipped 52 lines
    381 429   # In most cases when we are detecting by status code,
    382 430   # it is not necessary to get the entire body: we can
    383 431   # detect fine with just the HEAD response.
    384  - request_method = session.head
     432 + request_method = 'head'
    385 433   else:
    386 434   # Either this detect method needs the content associated
    387 435   # with the GET response, or this specific website will
    388 436   # not respond properly unless we request the whole page.
    389  - request_method = session.get
     437 + request_method = 'get'
    390 438   
    391 439   if site.check_type == "response_url":
    392 440   # Site forwards request to a different URL if username not
    skipped 5 lines
    398 446   # The final result of the request will be what is available.
    399 447   allow_redirects = True
    400 448   
    401  - future = request_method(
     449 + future = checker.prepare(
     450 + method=request_method,
    402 451   url=url_probe,
    403 452   headers=headers,
    404 453   allow_redirects=allow_redirects,
    skipped 2 lines
    407 456   
    408 457   # Store future request object in the results object
    409 458   results_site["future"] = future
     459 + results_site["checker"] = checker
    410 460   
    411 461   return results_site
    412 462   
    skipped 6 lines
    419 469   if not future:
    420 470   return site.name, default_result
    421 471   
    422  - response = await get_response(request_future=future, logger=logger)
     472 + checker = default_result["checker"]
     473 + 
     474 + response = await checker.check(future=future)
    423 475   
    424 476   response_result = process_site_result(
    425 477   response, query_notify, logger, default_result, site
    skipped 4 lines
    430 482   return site.name, response_result
    431 483   
    432 484   
    433  -async def debug_ip_request(session, logger):
    434  - future = session.get(url="https://icanhazip.com")
    435  - ip, status, check_error = await get_response(future, logger)
     485 +async def debug_ip_request(checker, logger):
     486 + future = checker.prepare(url="https://icanhazip.com")
     487 + ip, status, check_error = await checker.check(future)
    436 488   if ip:
    437 489   logger.debug(f"My IP is: {ip.strip()}")
    438 490   else:
    skipped 17 lines
    456 508   logger,
    457 509   query_notify=None,
    458 510   proxy=None,
     511 + tor_proxy=None,
    459 512   timeout=3,
    460 513   is_parsing_enabled=False,
    461 514   id_type="username",
    skipped 46 lines
    508 561   
    509 562   query_notify.start(username, id_type)
    510 563   
    511  - # make http client session
    512  - connector = (
    513  - ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
    514  - )
    515  - connector.verify_ssl = False
    516  - 
    517 564   cookie_jar = None
    518 565   if cookies:
    519 566   logger.debug(f"Using cookies jar file {cookies}")
    520  - cookie_jar = await import_aiohttp_cookies(cookies)
     567 + cookie_jar = import_aiohttp_cookies(cookies)
    521 568   
    522  - session = aiohttp.ClientSession(
    523  - connector=connector, trust_env=True, cookie_jar=cookie_jar
     569 + clearweb_checker = SimpleAiohttpChecker(
     570 + proxy=proxy, cookie_jar=cookie_jar, logger=logger
    524 571   )
    525 572   
     573 + # TODO
     574 + tor_checker = Mock()
     575 + if tor_proxy:
     576 + tor_checker = TorAiohttpChecker( # type: ignore
     577 + proxy=tor_proxy, cookie_jar=cookie_jar, logger=logger
     578 + )
     579 + 
    526 580   if logger.level == logging.DEBUG:
    527  - await debug_ip_request(session, logger)
     581 + await debug_ip_request(clearweb_checker, logger)
    528 582   
    529 583   # setup parallel executor
    530 584   executor: Optional[AsyncExecutor] = None
    skipped 7 lines
    538 592   # make options objects for all the requests
    539 593   options: QueryOptions = {}
    540 594   options["cookies"] = cookie_jar
    541  - options["session"] = session
     595 + options["checkers"] = {
     596 + '': clearweb_checker,
     597 + 'tor': tor_checker,
     598 + }
    542 599   options["parsing"] = is_parsing_enabled
    543 600   options["timeout"] = timeout
    544 601   options["id_type"] = id_type
    skipped 46 lines
    591 648   )
    592 649   
    593 650   # closing http client session
    594  - await session.close()
     651 + await clearweb_checker.close()
     652 + if tor_proxy:
     653 + await tor_checker.close()
    595 654   
    596 655   # notify caller that all queries are finished
    597 656   query_notify.finish()
    skipped 137 lines
  • ■ ■ ■ ■ ■ ■
    maigret/maigret.py
    skipped 237 lines
    238 238   default=None,
    239 239   help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
    240 240   )
     241 + parser.add_argument(
     242 + "--tor-proxy",
     243 + metavar='TOR_PROXY_URL',
     244 + action="store",
     245 + default='socks5://127.0.0.1:9050',
     246 + help="Specify URL of your Tor gateway. Default is socks5://127.0.0.1:9050",
     247 + )
    241 248   
    242 249   filter_group = parser.add_argument_group(
    243 250   'Site filtering', 'Options to set site search scope'
    skipped 340 lines
    584 591   site_dict=dict(sites_to_check),
    585 592   query_notify=query_notify,
    586 593   proxy=args.proxy,
     594 + tor_proxy=args.tor_proxy,
    587 595   timeout=args.timeout,
    588 596   is_parsing_enabled=parsing_enabled,
    589 597   id_type=id_type,
    skipped 82 lines
  • ■ ■ ■ ■ ■
    maigret/report.py
    skipped 280 lines
    281 281   data = dict(site_result)
    282 282   data["status"] = data["status"].json()
    283 283   data["site"] = data["site"].json
    284  - if "future" in data:
    285  - del data["future"]
     284 + for field in ["future", "checker"]:
     285 + if field in data:
     286 + del data[field]
    286 287   
    287 288   if is_report_per_line:
    288 289   data["sitename"] = sitename
    skipped 85 lines
  • ■ ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 27743 lines
    27744 27744   "tags": [
    27745 27745   "business"
    27746 27746   ]
     27747 + },
     27748 + "HiddenAnswers": {
     27749 + "tags": [
     27750 + "tor"
     27751 + ],
     27752 + "network": "tor",
     27753 + "url": "http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion/user/{username}",
     27754 + "urlMain": "http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion",
     27755 + "usernameClaimed": "theredqueen",
     27756 + "usernameUnclaimed": "noonewouldeverusethis7",
     27757 + "checkType": "message",
     27758 + "absenceStrs": [
     27759 + "Page not found"
     27760 + ],
     27761 + "presenseStrs": [
     27762 + "qa-part-form-profile"
     27763 + ]
    27747 27764   }
    27748 27765   },
    27749 27766   "engines": {
    skipped 205 lines
  • ■ ■ ■ ■ ■ ■
    maigret/sites.py
    skipped 64 lines
    65 65   "review",
    66 66   "bookmarks",
    67 67   "design",
     68 + "tor",
    68 69  ]
    69 70   
    70 71   
    skipped 50 lines
    121 122   request_future = None
    122 123   alexa_rank = None
    123 124   source = None
     125 + 
     126 + network = ''
    124 127   
    125 128   def __init__(self, name, information):
    126 129   self.name = name
    skipped 360 lines
  • ■ ■ ■ ■
    tests/test_activation.py
    skipped 39 lines
    40 40   with open(cookies_filename, 'w') as f:
    41 41   f.write(COOKIES_TXT)
    42 42   
    43  - cookie_jar = await import_aiohttp_cookies(cookies_filename)
     43 + cookie_jar = import_aiohttp_cookies(cookies_filename)
    44 44   assert list(cookie_jar._cookies.keys()) == ['xss.is', 'httpbin.org']
    45 45   
    46 46   url = 'https://httpbin.org/cookies'
    skipped 11 lines
  • ■ ■ ■ ■ ■ ■
    tests/test_checking.py
    skipped 8 lines
    9 9   server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
    10 10   
    11 11   
     12 +@pytest.mark.slow
    12 13  @pytest.mark.asyncio
    13 14  async def test_checking_by_status_code(httpserver, local_test_db):
    14 15   sites_dict = local_test_db.sites_dict
    skipped 8 lines
    23 24   assert result['StatusCode']['status'].is_found() is False
    24 25   
    25 26   
     27 +@pytest.mark.slow
    26 28  @pytest.mark.asyncio
    27 29  async def test_checking_by_message_positive_full(httpserver, local_test_db):
    28 30   sites_dict = local_test_db.sites_dict
    skipped 8 lines
    37 39   assert result['Message']['status'].is_found() is False
    38 40   
    39 41   
     42 +@pytest.mark.slow
    40 43  @pytest.mark.asyncio
    41 44  async def test_checking_by_message_positive_part(httpserver, local_test_db):
    42 45   sites_dict = local_test_db.sites_dict
    skipped 8 lines
    51 54   assert result['Message']['status'].is_found() is False
    52 55   
    53 56   
     57 +@pytest.mark.slow
    54 58  @pytest.mark.asyncio
    55 59  async def test_checking_by_message_negative(httpserver, local_test_db):
    56 60   sites_dict = local_test_db.sites_dict
    skipped 10 lines
  • ■ ■ ■ ■ ■
    tests/test_cli.py
    skipped 30 lines
    31 31   'stats': False,
    32 32   'tags': '',
    33 33   'timeout': 30,
     34 + 'tor_proxy': 'socks5://127.0.0.1:9050',
    34 35   'top_sites': 500,
    35 36   'txt': False,
    36 37   'use_disabled_sites': False,
    skipped 58 lines
  • ■ ■ ■ ■ ■
    tests/test_maigret.py
    skipped 137 lines
    138 138   
    139 139   assert results['Reddit'].get('future') is None
    140 140   del results['GooglePlayStore']['future']
     141 + del results['GooglePlayStore']['checker']
    141 142   
    142 143   assert results == RESULTS_EXAMPLE
    143 144   
    skipped 35 lines
Please wait...
Page is in error, reload to recover