| skipped 42 lines |
43 | 43 | | BAD_CHARS = "#" |
44 | 44 | | |
45 | 45 | | |
46 | | - | async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]: |
47 | | - | html_text = None |
48 | | - | status_code = 0 |
49 | | - | error: Optional[CheckError] = CheckError("Unknown") |
| 46 | + | class SimpleAiohttpChecker: |
| 47 | + | def __init__(self, *args, **kwargs): |
| 48 | + | proxy = kwargs.get('proxy') |
| 49 | + | cookie_jar = kwargs.get('cookie_jar') |
| 50 | + | self.logger = kwargs.get('logger', Mock()) |
| 51 | + | |
| 52 | + | # make http client session |
| 53 | + | connector = ( |
| 54 | + | ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False) |
| 55 | + | ) |
| 56 | + | connector.verify_ssl = False |
| 57 | + | self.session = aiohttp.ClientSession( |
| 58 | + | connector=connector, trust_env=True, cookie_jar=cookie_jar |
| 59 | + | ) |
| 60 | + | |
| 61 | + | def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'): |
| 62 | + | if method == 'get': |
| 63 | + | request_method = self.session.get |
| 64 | + | else: |
| 65 | + | request_method = self.session.head |
| 66 | + | |
| 67 | + | future = request_method( |
| 68 | + | url=url, |
| 69 | + | headers=headers, |
| 70 | + | allow_redirects=allow_redirects, |
| 71 | + | timeout=timeout, |
| 72 | + | ) |
| 73 | + | |
| 74 | + | return future |
| 75 | + | |
| 76 | + | async def close(self): |
| 77 | + | await self.session.close() |
| 78 | + | |
| 79 | + | async def check(self, future) -> Tuple[str, int, Optional[CheckError]]: |
| 80 | + | html_text = None |
| 81 | + | status_code = 0 |
| 82 | + | error: Optional[CheckError] = CheckError("Unknown") |
| 83 | + | |
| 84 | + | try: |
| 85 | + | response = await future |
| 86 | + | |
| 87 | + | status_code = response.status |
| 88 | + | response_content = await response.content.read() |
| 89 | + | charset = response.charset or "utf-8" |
| 90 | + | decoded_content = response_content.decode(charset, "ignore") |
| 91 | + | html_text = decoded_content |
50 | 92 | | |
51 | | - | try: |
52 | | - | response = await request_future |
| 93 | + | error = None |
| 94 | + | if status_code == 0: |
| 95 | + | error = CheckError("Connection lost") |
53 | 96 | | |
54 | | - | status_code = response.status |
55 | | - | response_content = await response.content.read() |
56 | | - | charset = response.charset or "utf-8" |
57 | | - | decoded_content = response_content.decode(charset, "ignore") |
58 | | - | html_text = decoded_content |
| 97 | + | self.logger.debug(html_text) |
| 98 | + | |
| 99 | + | except asyncio.TimeoutError as e: |
| 100 | + | error = CheckError("Request timeout", str(e)) |
| 101 | + | except ClientConnectorError as e: |
| 102 | + | error = CheckError("Connecting failure", str(e)) |
| 103 | + | except ServerDisconnectedError as e: |
| 104 | + | error = CheckError("Server disconnected", str(e)) |
| 105 | + | except aiohttp.http_exceptions.BadHttpMessage as e: |
| 106 | + | error = CheckError("HTTP", str(e)) |
| 107 | + | except proxy_errors.ProxyError as e: |
| 108 | + | error = CheckError("Proxy", str(e)) |
| 109 | + | except KeyboardInterrupt: |
| 110 | + | error = CheckError("Interrupted") |
| 111 | + | except Exception as e: |
| 112 | + | # python-specific exceptions |
| 113 | + | if sys.version_info.minor > 6 and ( |
| 114 | + | isinstance(e, ssl.SSLCertVerificationError) |
| 115 | + | or isinstance(e, ssl.SSLError) |
| 116 | + | ): |
| 117 | + | error = CheckError("SSL", str(e)) |
| 118 | + | else: |
| 119 | + | self.logger.debug(e, exc_info=True) |
| 120 | + | error = CheckError("Unexpected", str(e)) |
59 | 121 | | |
60 | | - | error = None |
61 | | - | if status_code == 0: |
62 | | - | error = CheckError("Connection lost") |
| 122 | + | return str(html_text), status_code, error |
63 | 123 | | |
64 | | - | logger.debug(html_text) |
65 | 124 | | |
66 | | - | except asyncio.TimeoutError as e: |
67 | | - | error = CheckError("Request timeout", str(e)) |
68 | | - | except ClientConnectorError as e: |
69 | | - | error = CheckError("Connecting failure", str(e)) |
70 | | - | except ServerDisconnectedError as e: |
71 | | - | error = CheckError("Server disconnected", str(e)) |
72 | | - | except aiohttp.http_exceptions.BadHttpMessage as e: |
73 | | - | error = CheckError("HTTP", str(e)) |
74 | | - | except proxy_errors.ProxyError as e: |
75 | | - | error = CheckError("Proxy", str(e)) |
76 | | - | except KeyboardInterrupt: |
77 | | - | error = CheckError("Interrupted") |
78 | | - | except Exception as e: |
79 | | - | # python-specific exceptions |
80 | | - | if sys.version_info.minor > 6 and ( |
81 | | - | isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError) |
82 | | - | ): |
83 | | - | error = CheckError("SSL", str(e)) |
84 | | - | else: |
85 | | - | logger.debug(e, exc_info=True) |
86 | | - | error = CheckError("Unexpected", str(e)) |
| 125 | + | class TorAiohttpChecker(SimpleAiohttpChecker): |
| 126 | + | def __init__(self, *args, **kwargs): |
| 127 | + | proxy = kwargs.get('proxy') |
| 128 | + | cookie_jar = kwargs.get('cookie_jar') |
| 129 | + | self.logger = kwargs.get('logger', Mock()) |
87 | 130 | | |
88 | | - | return str(html_text), status_code, error |
| 131 | + | connector = ProxyConnector.from_url(proxy) |
| 132 | + | connector.verify_ssl = False |
| 133 | + | self.session = aiohttp.ClientSession( |
| 134 | + | connector=connector, trust_env=True, cookie_jar=cookie_jar |
| 135 | + | ) |
89 | 136 | | |
90 | 137 | | |
91 | 138 | | # TODO: move to separate class |
| skipped 230 lines |
322 | 369 | | # workaround to prevent slash errors |
323 | 370 | | url = re.sub("(?<!:)/+", "/", url) |
324 | 371 | | |
325 | | - | session = options['session'] |
| 372 | + | # always clearweb_checker for now |
| 373 | + | checker = options["checkers"][site.network] |
326 | 374 | | |
327 | 375 | | # site check is disabled |
328 | 376 | | if site.disabled and not options['forced']: |
| skipped 52 lines |
381 | 429 | | # In most cases when we are detecting by status code, |
382 | 430 | | # it is not necessary to get the entire body: we can |
383 | 431 | | # detect fine with just the HEAD response. |
384 | | - | request_method = session.head |
| 432 | + | request_method = 'head' |
385 | 433 | | else: |
386 | 434 | | # Either this detect method needs the content associated |
387 | 435 | | # with the GET response, or this specific website will |
388 | 436 | | # not respond properly unless we request the whole page. |
389 | | - | request_method = session.get |
| 437 | + | request_method = 'get' |
390 | 438 | | |
391 | 439 | | if site.check_type == "response_url": |
392 | 440 | | # Site forwards request to a different URL if username not |
| skipped 5 lines |
398 | 446 | | # The final result of the request will be what is available. |
399 | 447 | | allow_redirects = True |
400 | 448 | | |
401 | | - | future = request_method( |
| 449 | + | future = checker.prepare( |
| 450 | + | method=request_method, |
402 | 451 | | url=url_probe, |
403 | 452 | | headers=headers, |
404 | 453 | | allow_redirects=allow_redirects, |
| skipped 2 lines |
407 | 456 | | |
408 | 457 | | # Store future request object in the results object |
409 | 458 | | results_site["future"] = future |
| 459 | + | results_site["checker"] = checker |
410 | 460 | | |
411 | 461 | | return results_site |
412 | 462 | | |
| skipped 6 lines |
419 | 469 | | if not future: |
420 | 470 | | return site.name, default_result |
421 | 471 | | |
422 | | - | response = await get_response(request_future=future, logger=logger) |
| 472 | + | checker = default_result["checker"] |
| 473 | + | |
| 474 | + | response = await checker.check(future=future) |
423 | 475 | | |
424 | 476 | | response_result = process_site_result( |
425 | 477 | | response, query_notify, logger, default_result, site |
| skipped 4 lines |
430 | 482 | | return site.name, response_result |
431 | 483 | | |
432 | 484 | | |
433 | | - | async def debug_ip_request(session, logger): |
434 | | - | future = session.get(url="https://icanhazip.com") |
435 | | - | ip, status, check_error = await get_response(future, logger) |
| 485 | + | async def debug_ip_request(checker, logger): |
| 486 | + | future = checker.prepare(url="https://icanhazip.com") |
| 487 | + | ip, status, check_error = await checker.check(future) |
436 | 488 | | if ip: |
437 | 489 | | logger.debug(f"My IP is: {ip.strip()}") |
438 | 490 | | else: |
| skipped 17 lines |
456 | 508 | | logger, |
457 | 509 | | query_notify=None, |
458 | 510 | | proxy=None, |
| 511 | + | tor_proxy=None, |
459 | 512 | | timeout=3, |
460 | 513 | | is_parsing_enabled=False, |
461 | 514 | | id_type="username", |
| skipped 46 lines |
508 | 561 | | |
509 | 562 | | query_notify.start(username, id_type) |
510 | 563 | | |
511 | | - | # make http client session |
512 | | - | connector = ( |
513 | | - | ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False) |
514 | | - | ) |
515 | | - | connector.verify_ssl = False |
516 | | - | |
517 | 564 | | cookie_jar = None |
518 | 565 | | if cookies: |
519 | 566 | | logger.debug(f"Using cookies jar file {cookies}") |
520 | | - | cookie_jar = await import_aiohttp_cookies(cookies) |
| 567 | + | cookie_jar = import_aiohttp_cookies(cookies) |
521 | 568 | | |
522 | | - | session = aiohttp.ClientSession( |
523 | | - | connector=connector, trust_env=True, cookie_jar=cookie_jar |
| 569 | + | clearweb_checker = SimpleAiohttpChecker( |
| 570 | + | proxy=proxy, cookie_jar=cookie_jar, logger=logger |
524 | 571 | | ) |
525 | 572 | | |
| 573 | + | # TODO |
| 574 | + | tor_checker = Mock() |
| 575 | + | if tor_proxy: |
| 576 | + | tor_checker = TorAiohttpChecker( # type: ignore |
| 577 | + | proxy=tor_proxy, cookie_jar=cookie_jar, logger=logger |
| 578 | + | ) |
| 579 | + | |
526 | 580 | | if logger.level == logging.DEBUG: |
527 | | - | await debug_ip_request(session, logger) |
| 581 | + | await debug_ip_request(clearweb_checker, logger) |
528 | 582 | | |
529 | 583 | | # setup parallel executor |
530 | 584 | | executor: Optional[AsyncExecutor] = None |
| skipped 7 lines |
538 | 592 | | # make options objects for all the requests |
539 | 593 | | options: QueryOptions = {} |
540 | 594 | | options["cookies"] = cookie_jar |
541 | | - | options["session"] = session |
| 595 | + | options["checkers"] = { |
| 596 | + | '': clearweb_checker, |
| 597 | + | 'tor': tor_checker, |
| 598 | + | } |
542 | 599 | | options["parsing"] = is_parsing_enabled |
543 | 600 | | options["timeout"] = timeout |
544 | 601 | | options["id_type"] = id_type |
| skipped 46 lines |
591 | 648 | | ) |
592 | 649 | | |
593 | 650 | | # closing http client session |
594 | | - | await session.close() |
| 651 | + | await clearweb_checker.close() |
| 652 | + | if tor_proxy: |
| 653 | + | await tor_checker.close() |
595 | 654 | | |
596 | 655 | | # notify caller that all queries are finished |
597 | 656 | | query_notify.finish() |
| skipped 137 lines |