Projects STRLCPY maigret Commits 5ee91f66
🤬
  • Introduced `--retries` flag, made thorough refactoring - updated sites list - test scripts linting

  • Loading...
  • Soxoj committed 3 years ago
    5ee91f66
    1 parent 7fd4a2c5
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    README.md
    skipped 25 lines
    26 26  * Search by tags (site categories, countries)
    27 27  * Censorship and captcha detection
    28 28  * Very few false positives
     29 +* Failed requests' restarts
    29 30   
    30 31  ## Installation
    31 32   
    skipped 17 lines
    49 50  git clone https://github.com/soxoj/maigret && cd maigret
    50 51  ```
    51 52   
    52  -You can use your a free virtual machine, the repo will be automatically cloned:
     53 +You can use a free virtual machine, the repo will be automatically cloned:
    53 54   
    54 55  [![Open in Cloud Shell](https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md) [![Run on Repl.it](https://user-images.githubusercontent.com/27065646/92304596-bf719b00-ef7f-11ea-987f-2c1f3c323088.png)](https://repl.it/github/soxoj/maigret)
    55 56  <a href="https://colab.research.google.com/gist//soxoj/879b51bc3b2f8b695abb054090645000/maigret.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="40"></a>
    skipped 59 lines
  • ■ ■ ■ ■
    format.sh
    1 1  #!/bin/sh
    2  -FILES="maigret wizard.py maigret.py"
     2 +FILES="maigret wizard.py maigret.py tests"
    3 3   
    4 4  echo 'black'
    5 5  black --skip-string-normalization $FILES
  • ■ ■ ■ ■ ■ ■
    lint.sh
    1 1  #!/bin/sh
    2  -FILES="maigret wizard.py maigret.py"
     2 +FILES="maigret wizard.py maigret.py tests"
    3 3   
    4 4  echo 'syntax errors or undefined names'
    5 5  flake8 --count --select=E9,F63,F7,F82 --show-source --statistics $FILES
    skipped 2 lines
    8 8  flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503 $FILES
    9 9   
    10 10  echo 'mypy'
    11  -mypy ./maigret
     11 +mypy ./maigret ./wizard.py ./tests
  • ■ ■ ■ ■ ■ ■
    maigret/checking.py
    skipped 4 lines
    5 5  import ssl
    6 6  import sys
    7 7  import tqdm
    8  -from typing import Tuple, Optional
     8 +from typing import Tuple, Optional, Dict, List
    9 9   
    10 10  import aiohttp
    11 11  import tqdm.asyncio
    skipped 4 lines
    16 16  from .activation import ParsingActivator, import_aiohttp_cookies
    17 17  from . import errors
    18 18  from .errors import CheckError
    19  -from .executors import AsyncioSimpleExecutor, AsyncioProgressbarQueueExecutor
     19 +from .executors import (
     20 + AsyncExecutor,
     21 + AsyncioSimpleExecutor,
     22 + AsyncioProgressbarQueueExecutor,
     23 +)
    20 24  from .result import QueryResult, QueryStatus
    21 25  from .sites import MaigretDatabase, MaigretSite
     26 +from .types import QueryOptions, QueryResultWrapper
    22 27  from .utils import get_random_user_agent
    23 28   
    24 29   
    skipped 10 lines
    35 40  unsupported_characters = "#"
    36 41   
    37 42   
    38  -async def get_response(
    39  - request_future, site_name, logger
    40  -) -> Tuple[str, int, Optional[CheckError]]:
     43 +async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]:
    41 44   html_text = None
    42 45   status_code = 0
    43  - error: Optional[CheckError] = CheckError("Error")
     46 + error: Optional[CheckError] = CheckError("Unknown")
    44 47   
    45 48   try:
    46 49   response = await request_future
    skipped 29 lines
    76 79   ):
    77 80   error = CheckError("SSL", str(e))
    78 81   else:
    79  - logger.warning(f"Unhandled error while requesting {site_name}: {e}")
    80 82   logger.debug(e, exc_info=True)
    81  - error = CheckError("Error", str(e))
     83 + error = CheckError("Unexpected", str(e))
    82 84   
    83  - # TODO: return only needed information
    84 85   return str(html_text), status_code, error
    85 86   
    86 87   
    87  -async def update_site_dict_from_response(
    88  - sitename, site_dict, results_info, logger, query_notify
    89  -):
    90  - site_obj = site_dict[sitename]
    91  - future = site_obj.request_future
    92  - if not future:
    93  - # ignore: search by incompatible id type
    94  - return
    95  - 
    96  - response = await get_response(
    97  - request_future=future, site_name=sitename, logger=logger
    98  - )
    99  - 
    100  - return sitename, process_site_result(
    101  - response, query_notify, logger, results_info, site_obj
    102  - )
    103  - 
    104  - 
    105 88  # TODO: move to separate class
    106 89  def detect_error_page(
    107 90   html_text, status_code, fail_flags, ignore_403
    skipped 19 lines
    127 110   
    128 111   
    129 112  def process_site_result(
    130  - response, query_notify, logger, results_info, site: MaigretSite
     113 + response, query_notify, logger, results_info: QueryResultWrapper, site: MaigretSite
    131 114  ):
    132 115   if not response:
    133 116   return results_info
    skipped 71 lines
    205 188   logger.debug(presense_flag)
    206 189   break
    207 190   
     191 + def build_result(status, **kwargs):
     192 + return QueryResult(
     193 + username,
     194 + site_name,
     195 + url,
     196 + status,
     197 + query_time=response_time,
     198 + tags=fulltags,
     199 + **kwargs,
     200 + )
     201 + 
    208 202   if check_error:
    209 203   logger.debug(check_error)
    210 204   result = QueryResult(
    skipped 7 lines
    218 212   tags=fulltags,
    219 213   )
    220 214   elif check_type == "message":
    221  - absence_flags = site.absence_strs
    222  - is_absence_flags_list = isinstance(absence_flags, list)
    223  - absence_flags_set = (
    224  - set(absence_flags) if is_absence_flags_list else {absence_flags}
    225  - )
    226 215   # Checks if the error message is in the HTML
    227 216   is_absence_detected = any(
    228  - [(absence_flag in html_text) for absence_flag in absence_flags_set]
     217 + [(absence_flag in html_text) for absence_flag in site.absence_strs]
    229 218   )
    230 219   if not is_absence_detected and is_presense_detected:
    231  - result = QueryResult(
    232  - username,
    233  - site_name,
    234  - url,
    235  - QueryStatus.CLAIMED,
    236  - query_time=response_time,
    237  - tags=fulltags,
    238  - )
     220 + result = build_result(QueryStatus.CLAIMED)
    239 221   else:
    240  - result = QueryResult(
    241  - username,
    242  - site_name,
    243  - url,
    244  - QueryStatus.AVAILABLE,
    245  - query_time=response_time,
    246  - tags=fulltags,
    247  - )
     222 + result = build_result(QueryStatus.AVAILABLE)
    248 223   elif check_type == "status_code":
    249 224   # Checks if the status code of the response is 2XX
    250  - if (not status_code >= 300 or status_code < 200) and is_presense_detected:
    251  - result = QueryResult(
    252  - username,
    253  - site_name,
    254  - url,
    255  - QueryStatus.CLAIMED,
    256  - query_time=response_time,
    257  - tags=fulltags,
    258  - )
     225 + if is_presense_detected and (not status_code >= 300 or status_code < 200):
     226 + result = build_result(QueryStatus.CLAIMED)
    259 227   else:
    260  - result = QueryResult(
    261  - username,
    262  - site_name,
    263  - url,
    264  - QueryStatus.AVAILABLE,
    265  - query_time=response_time,
    266  - tags=fulltags,
    267  - )
     228 + result = build_result(QueryStatus.AVAILABLE)
    268 229   elif check_type == "response_url":
    269 230   # For this detection method, we have turned off the redirect.
    270 231   # So, there is no need to check the response URL: it will always
    skipped 1 lines
    272 233   # code indicates that the request was successful (i.e. no 404, or
    273 234   # forward to some odd redirect).
    274 235   if 200 <= status_code < 300 and is_presense_detected:
    275  - result = QueryResult(
    276  - username,
    277  - site_name,
    278  - url,
    279  - QueryStatus.CLAIMED,
    280  - query_time=response_time,
    281  - tags=fulltags,
    282  - )
     236 + result = build_result(QueryStatus.CLAIMED)
    283 237   else:
    284  - result = QueryResult(
    285  - username,
    286  - site_name,
    287  - url,
    288  - QueryStatus.AVAILABLE,
    289  - query_time=response_time,
    290  - tags=fulltags,
    291  - )
     238 + result = build_result(QueryStatus.AVAILABLE)
    292 239   else:
    293 240   # It should be impossible to ever get here...
    294 241   raise ValueError(
    skipped 34 lines
    329 276   return results_info
    330 277   
    331 278   
     279 +def make_site_result(
     280 + site: MaigretSite, username: str, options: QueryOptions, logger
     281 +) -> QueryResultWrapper:
     282 + results_site: QueryResultWrapper = {}
     283 + 
     284 + # Record URL of main site and username
     285 + results_site["site"] = site
     286 + results_site["username"] = username
     287 + results_site["parsing_enabled"] = options["parsing"]
     288 + results_site["url_main"] = site.url_main
     289 + results_site["cookies"] = (
     290 + options.get("cookie_jar")
     291 + and options["cookie_jar"].filter_cookies(site.url_main)
     292 + or None
     293 + )
     294 + 
     295 + headers = {
     296 + "User-Agent": get_random_user_agent(),
     297 + }
     298 + 
     299 + headers.update(site.headers)
     300 + 
     301 + if "url" not in site.__dict__:
     302 + logger.error("No URL for site %s", site.name)
     303 + 
     304 + # URL of user on site (if it exists)
     305 + url = site.url.format(
     306 + urlMain=site.url_main, urlSubpath=site.url_subpath, username=username
     307 + )
     308 + 
     309 + # workaround to prevent slash errors
     310 + url = re.sub("(?<!:)/+", "/", url)
     311 + 
     312 + session = options['session']
     313 + 
     314 + # site check is disabled
     315 + if site.disabled and not options['forced']:
     316 + logger.debug(f"Site {site.name} is disabled, skipping...")
     317 + results_site["status"] = QueryResult(
     318 + username,
     319 + site.name,
     320 + url,
     321 + QueryStatus.ILLEGAL,
     322 + error=CheckError("Check is disabled"),
     323 + )
     324 + # current username type could not be applied
     325 + elif site.type != options["id_type"]:
     326 + results_site["status"] = QueryResult(
     327 + username,
     328 + site.name,
     329 + url,
     330 + QueryStatus.ILLEGAL,
     331 + error=CheckError('Unsupported identifier type', f'Want "{site.type}"'),
     332 + )
     333 + # username is not allowed.
     334 + elif site.regex_check and re.search(site.regex_check, username) is None:
     335 + results_site["status"] = QueryResult(
     336 + username,
     337 + site.name,
     338 + url,
     339 + QueryStatus.ILLEGAL,
     340 + error=CheckError(
     341 + 'Unsupported username format', f'Want "{site.regex_check}"'
     342 + ),
     343 + )
     344 + results_site["url_user"] = ""
     345 + results_site["http_status"] = ""
     346 + results_site["response_text"] = ""
     347 + # query_notify.update(results_site["status"])
     348 + else:
     349 + # URL of user on site (if it exists)
     350 + results_site["url_user"] = url
     351 + url_probe = site.url_probe
     352 + if url_probe is None:
     353 + # Probe URL is normal one seen by people out on the web.
     354 + url_probe = url
     355 + else:
     356 + # There is a special URL for probing existence separate
     357 + # from where the user profile normally can be found.
     358 + url_probe = url_probe.format(
     359 + urlMain=site.url_main,
     360 + urlSubpath=site.url_subpath,
     361 + username=username,
     362 + )
     363 + 
     364 + for k, v in site.get_params.items():
     365 + url_probe += f"&{k}={v}"
     366 + 
     367 + if site.check_type == "status_code" and site.request_head_only:
     368 + # In most cases when we are detecting by status code,
     369 + # it is not necessary to get the entire body: we can
     370 + # detect fine with just the HEAD response.
     371 + request_method = session.head
     372 + else:
     373 + # Either this detect method needs the content associated
     374 + # with the GET response, or this specific website will
     375 + # not respond properly unless we request the whole page.
     376 + request_method = session.get
     377 + 
     378 + if site.check_type == "response_url":
     379 + # Site forwards request to a different URL if username not
     380 + # found. Disallow the redirect so we can capture the
     381 + # http status from the original URL request.
     382 + allow_redirects = False
     383 + else:
     384 + # Allow whatever redirect that the site wants to do.
     385 + # The final result of the request will be what is available.
     386 + allow_redirects = True
     387 + 
     388 + future = request_method(
     389 + url=url_probe,
     390 + headers=headers,
     391 + allow_redirects=allow_redirects,
     392 + timeout=options['timeout'],
     393 + )
     394 + 
     395 + # Store future request object in the results object
     396 + results_site["future"] = future
     397 + 
     398 + return results_site
     399 + 
     400 + 
     401 +async def check_site_for_username(
     402 + site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
     403 +) -> Tuple[str, QueryResultWrapper]:
     404 + default_result = make_site_result(site, username, options, logger)
     405 + future = default_result.get("future")
     406 + if not future:
     407 + return site.name, default_result
     408 + 
     409 + response = await get_response(request_future=future, logger=logger)
     410 + 
     411 + response_result = process_site_result(
     412 + response, query_notify, logger, default_result, site
     413 + )
     414 + 
     415 + return site.name, response_result
     416 + 
     417 + 
     418 +async def debug_ip_request(session, logger):
     419 + future = session.get(url="https://icanhazip.com")
     420 + ip, status, check_error = await get_response(future, logger)
     421 + if ip:
     422 + logger.debug(f"My IP is: {ip.strip()}")
     423 + else:
     424 + logger.debug(f"IP requesting {check_error.type}: {check_error.desc}")
     425 + 
     426 + 
     427 +def get_failed_sites(results: Dict[str, QueryResultWrapper]) -> List[str]:
     428 + sites = []
     429 + for sitename, r in results.items():
     430 + status = r.get('status', {})
     431 + if status and status.error:
     432 + if errors.is_permanent(status.error.type):
     433 + continue
     434 + sites.append(sitename)
     435 + return sites
     436 + 
     437 + 
    332 438  async def maigret(
    333  - username,
    334  - site_dict,
     439 + username: str,
     440 + site_dict: Dict[str, MaigretSite],
    335 441   logger,
    336 442   query_notify=None,
    337 443   proxy=None,
    skipped 5 lines
    343 449   max_connections=100,
    344 450   no_progressbar=False,
    345 451   cookies=None,
    346  -):
     452 + retries=0,
     453 +) -> QueryResultWrapper:
    347 454   """Main search func
    348 455   
    349 456   Checks for existence of username on certain sites.
    350 457   
    351 458   Keyword Arguments:
    352 459   username -- Username string will be used for search.
    353  - site_dict -- Dictionary containing sites data.
     460 + site_dict -- Dictionary containing sites data in MaigretSite objects.
    354 461   query_notify -- Object with base type of QueryNotify().
    355 462   This will be used to notify the caller about
    356 463   query results.
    skipped 23 lines
    380 487   there was an HTTP error when checking for existence.
    381 488   """
    382 489   
    383  - # Notify caller that we are starting the query.
     490 + # notify caller that we are starting the query.
    384 491   if not query_notify:
    385 492   query_notify = Mock()
    386 493   
    387 494   query_notify.start(username, id_type)
    388 495   
    389  - # TODO: connector
     496 + # make http client session
    390 497   connector = (
    391 498   ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
    392 499   )
    393  - # connector = aiohttp.TCPConnector(ssl=False)
    394 500   connector.verify_ssl = False
    395 501   
    396 502   cookie_jar = None
    skipped 6 lines
    403 509   )
    404 510   
    405 511   if logger.level == logging.DEBUG:
    406  - future = session.get(url="https://icanhazip.com")
    407  - ip, status, check_error = await get_response(future, None, logger)
    408  - if ip:
    409  - logger.debug(f"My IP is: {ip.strip()}")
    410  - else:
    411  - logger.debug(f"IP requesting {check_error[0]}: {check_error[1]}")
    412  - 
    413  - # Results from analysis of all sites
    414  - results_total = {}
     512 + await debug_ip_request(session, logger)
    415 513   
    416  - # First create futures for all requests. This allows for the requests to run in parallel
    417  - for site_name, site in site_dict.items():
     514 + # setup parallel executor
     515 + executor: Optional[AsyncExecutor] = None
     516 + if no_progressbar:
     517 + executor = AsyncioSimpleExecutor(logger=logger)
     518 + else:
     519 + executor = AsyncioProgressbarQueueExecutor(
     520 + logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
     521 + )
    418 522   
    419  - if site.type != id_type:
    420  - continue
     523 + # make options objects for all the requests
     524 + options: QueryOptions = {}
     525 + options["cookies"] = cookie_jar
     526 + options["session"] = session
     527 + options["parsing"] = is_parsing_enabled
     528 + options["timeout"] = timeout
     529 + options["id_type"] = id_type
     530 + options["forced"] = forced
    421 531   
    422  - if site.disabled and not forced:
    423  - logger.debug(f"Site {site.name} is disabled, skipping...")
    424  - continue
     532 + # results from analysis of all sites
     533 + all_results: Dict[str, QueryResultWrapper] = {}
    425 534   
    426  - # Results from analysis of this specific site
    427  - results_site = {}
     535 + sites = list(site_dict.keys())
    428 536   
    429  - # Record URL of main site and username
    430  - results_site["username"] = username
    431  - results_site["parsing_enabled"] = is_parsing_enabled
    432  - results_site["url_main"] = site.url_main
    433  - results_site["cookies"] = (
    434  - cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
    435  - )
     537 + attempts = retries + 1
     538 + while attempts:
     539 + tasks_dict = {}
    436 540   
    437  - headers = {
    438  - "User-Agent": get_random_user_agent(),
    439  - }
    440  - 
    441  - headers.update(site.headers)
    442  - 
    443  - if "url" not in site.__dict__:
    444  - logger.error("No URL for site %s", site.name)
    445  - # URL of user on site (if it exists)
    446  - url = site.url.format(
    447  - urlMain=site.url_main, urlSubpath=site.url_subpath, username=username
    448  - )
    449  - # workaround to prevent slash errors
    450  - url = re.sub("(?<!:)/+", "/", url)
    451  - 
    452  - # Don't make request if username is invalid for the site
    453  - if site.regex_check and re.search(site.regex_check, username) is None:
    454  - # No need to do the check at the site: this user name is not allowed.
    455  - results_site["status"] = QueryResult(
    456  - username, site_name, url, QueryStatus.ILLEGAL
     541 + for sitename, site in site_dict.items():
     542 + if sitename not in sites:
     543 + continue
     544 + default_result: QueryResultWrapper = {
     545 + 'site': site,
     546 + 'status': QueryResult(
     547 + username,
     548 + sitename,
     549 + '',
     550 + QueryStatus.UNKNOWN,
     551 + error=CheckError('Request failed'),
     552 + ),
     553 + }
     554 + tasks_dict[sitename] = (
     555 + check_site_for_username,
     556 + [site, username, options, logger, query_notify],
     557 + {'default': (sitename, default_result)},
    457 558   )
    458  - results_site["url_user"] = ""
    459  - results_site["http_status"] = ""
    460  - results_site["response_text"] = ""
    461  - query_notify.update(results_site["status"])
    462  - else:
    463  - # URL of user on site (if it exists)
    464  - results_site["url_user"] = url
    465  - url_probe = site.url_probe
    466  - if url_probe is None:
    467  - # Probe URL is normal one seen by people out on the web.
    468  - url_probe = url
    469  - else:
    470  - # There is a special URL for probing existence separate
    471  - # from where the user profile normally can be found.
    472  - url_probe = url_probe.format(
    473  - urlMain=site.url_main,
    474  - urlSubpath=site.url_subpath,
    475  - username=username,
    476  - )
    477 559   
    478  - for k, v in site.get_params.items():
    479  - url_probe += f"&{k}={v}"
     560 + cur_results = await executor.run(tasks_dict.values())
    480 561   
    481  - if site.check_type == "status_code" and site.request_head_only:
    482  - # In most cases when we are detecting by status code,
    483  - # it is not necessary to get the entire body: we can
    484  - # detect fine with just the HEAD response.
    485  - request_method = session.head
    486  - else:
    487  - # Either this detect method needs the content associated
    488  - # with the GET response, or this specific website will
    489  - # not respond properly unless we request the whole page.
    490  - request_method = session.get
     562 + # wait for executor timeout errors
     563 + await asyncio.sleep(1)
    491 564   
    492  - if site.check_type == "response_url":
    493  - # Site forwards request to a different URL if username not
    494  - # found. Disallow the redirect so we can capture the
    495  - # http status from the original URL request.
    496  - allow_redirects = False
    497  - else:
    498  - # Allow whatever redirect that the site wants to do.
    499  - # The final result of the request will be what is available.
    500  - allow_redirects = True
     565 + all_results.update(cur_results)
    501 566   
    502  - future = request_method(
    503  - url=url_probe,
    504  - headers=headers,
    505  - allow_redirects=allow_redirects,
    506  - timeout=timeout,
    507  - )
     567 + sites = get_failed_sites(dict(cur_results))
     568 + attempts -= 1
    508 569   
    509  - # Store future in data for access later
    510  - # TODO: move to separate obj
    511  - site.request_future = future
     570 + if not sites:
     571 + break
    512 572   
    513  - # Add this site's results into final dictionary with all of the other results.
    514  - results_total[site_name] = results_site
    515  - 
    516  - coroutines = []
    517  - for sitename, result_obj in results_total.items():
    518  - coroutines.append(
    519  - (
    520  - update_site_dict_from_response,
    521  - [sitename, site_dict, result_obj, logger, query_notify],
    522  - {},
     573 + if attempts:
     574 + query_notify.warning(
     575 + f'Restarting checks for {len(sites)} sites... ({attempts} attempts left)'
    523 576   )
    524  - )
    525 577   
    526  - if no_progressbar:
    527  - executor = AsyncioSimpleExecutor(logger=logger)
    528  - else:
    529  - executor = AsyncioProgressbarQueueExecutor(
    530  - logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
    531  - )
    532  - 
    533  - results = await executor.run(coroutines)
    534  - 
     578 + # closing http client session
    535 579   await session.close()
    536 580   
    537  - # Notify caller that all queries are finished.
     581 + # notify caller that all queries are finished
    538 582   query_notify.finish()
    539 583   
    540  - data = {}
    541  - for result in results:
    542  - # TODO: still can be empty
    543  - if result:
    544  - try:
    545  - data[result[0]] = result[1]
    546  - except Exception as e:
    547  - logger.error(e, exc_info=True)
    548  - logger.info(result)
    549  - 
    550  - return data
     584 + return all_results
    551 585   
    552 586   
    553 587  def timeout_check(value):
    skipped 21 lines
    575 609   return timeout
    576 610   
    577 611   
    578  -async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
     612 +async def site_self_check(
     613 + site: MaigretSite, logger, semaphore, db: MaigretDatabase, silent=False
     614 +):
    579 615   changes = {
    580 616   "disabled": False,
    581 617   }
    skipped 20 lines
    602 638   id_type=site.type,
    603 639   forced=True,
    604 640   no_progressbar=True,
     641 + retries=1,
    605 642   )
    606 643   
    607 644   # don't disable entries with other ids types
    skipped 80 lines
  • ■ ■ ■ ■ ■
    maigret/errors.py
    skipped 56 lines
    57 57   'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
    58 58  }
    59 59   
     60 +TEMPORARY_ERRORS_TYPES = [
     61 + 'Request timeout',
     62 + 'Unknown',
     63 + 'Request failed',
     64 + 'Connecting failure',
     65 + 'HTTP',
     66 + 'Proxy',
     67 + 'Interrupted',
     68 + 'Connection lost',
     69 +]
     70 + 
    60 71  THRESHOLD = 3 # percent
    61 72   
    62 73   
    skipped 1 lines
    64 75   return err_data['perc'] >= THRESHOLD
    65 76   
    66 77   
    67  -def is_not_permanent(err_data):
    68  - return True
     78 +def is_permanent(err_type):
     79 + return err_type not in TEMPORARY_ERRORS_TYPES
    69 80   
    70 81   
    71 82  def detect(text):
    skipped 34 lines
  • ■ ■ ■ ■
    maigret/executors.py
    skipped 92 lines
    93 93   try:
    94 94   result = await asyncio.wait_for(query_task, timeout=self.timeout)
    95 95   except asyncio.TimeoutError:
    96  - result = None
     96 + result = kwargs.get('default')
    97 97   
    98 98   self.results.append(result)
    99 99   self.progress.update(1)
    skipped 20 lines
  • ■ ■ ■ ■ ■
    maigret/maigret.py
    skipped 58 lines
    59 59   )
    60 60   
    61 61   
    62  -async def main():
     62 +def setup_arguments_parser():
    63 63   version_string = '\n'.join(
    64 64   [
    65 65   f'%(prog)s {__version__}',
    skipped 83 lines
    149 149   "On the other hand, this may cause a long delay to gather all results. ",
    150 150   )
    151 151   parser.add_argument(
     152 + "--retries",
     153 + action="store",
     154 + type=int,
     155 + metavar='RETRIES',
     156 + default=1,
     157 + help="Attempts to restart temporary failed requests.",
     158 + )
     159 + parser.add_argument(
    152 160   "-n",
    153 161   "--max-connections",
    154 162   action="store",
    skipped 179 lines
    334 342   help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
    335 343   " (one report per username).",
    336 344   )
     345 + return parser
    337 346   
    338  - args = parser.parse_args()
     347 + 
     348 +async def main():
     349 + arg_parser = setup_arguments_parser()
     350 + args = arg_parser.parse_args()
    339 351   
    340 352   # Logging
    341 353   log_level = logging.ERROR
    skipped 186 lines
    528 540   forced=args.use_disabled_sites,
    529 541   max_connections=args.connections,
    530 542   no_progressbar=args.no_progressbar,
     543 + retries=args.retries,
    531 544   )
    532 545   
    533 546   notify_about_errors(results, query_notify)
    skipped 79 lines
  • maigret/resources/data.json
    Diff is too large to be displayed.
  • ■ ■ ■ ■ ■ ■
    maigret/sites.py
    skipped 2 lines
    3 3  import copy
    4 4  import json
    5 5  import sys
    6  -from typing import Optional
     6 +from typing import Optional, List, Dict, Any
    7 7   
    8 8  import requests
    9 9   
    skipped 47 lines
    57 57   
    58 58   
    59 59  class MaigretEngine:
     60 + site: Dict[str, Any] = {}
     61 + 
    60 62   def __init__(self, name, data):
    61 63   self.name = name
    62  - self.site = {}
    63 64   self.__dict__.update(data)
    64 65   
    65 66   @property
    skipped 12 lines
    78 79   "urlRegexp",
    79 80   ]
    80 81   
    81  - def __init__(self, name, information):
    82  - self.name = name
     82 + username_claimed = ""
     83 + username_unclaimed = ""
     84 + url_subpath = ""
     85 + url_main = ""
     86 + url = ""
     87 + disabled = False
     88 + similar_search = False
     89 + ignore403 = False
     90 + tags: List[str] = []
    83 91   
    84  - self.disabled = False
    85  - self.similar_search = False
    86  - self.ignore403 = False
    87  - self.tags = []
     92 + type = "username"
     93 + headers: Dict[str, str] = {}
     94 + errors: Dict[str, str] = {}
     95 + activation: Dict[str, Any] = {}
     96 + regex_check = None
     97 + url_probe = None
     98 + check_type = ""
     99 + request_head_only = ""
     100 + get_params: Dict[str, Any] = {}
    88 101   
    89  - self.type = "username"
    90  - self.headers = {}
    91  - self.errors = {}
    92  - self.activation = {}
    93  - self.url_subpath = ""
    94  - self.regex_check = None
    95  - self.url_probe = None
    96  - self.check_type = ""
    97  - self.request_head_only = ""
    98  - self.get_params = {}
     102 + presense_strs: List[str] = []
     103 + absence_strs: List[str] = []
     104 + stats: Dict[str, Any] = {}
    99 105   
    100  - self.presense_strs = []
    101  - self.absence_strs = []
    102  - self.stats = {}
     106 + engine = None
     107 + engine_data: Dict[str, Any] = {}
     108 + engine_obj: Optional["MaigretEngine"] = None
     109 + request_future = None
     110 + alexa_rank = None
     111 + source = None
    103 112   
    104  - self.engine = None
    105  - self.engine_data = {}
    106  - self.engine_obj = None
    107  - self.request_future = None
    108  - self.alexa_rank = None
    109  - self.source = None
     113 + def __init__(self, name, information):
     114 + self.name = name
     115 + self.url_subpath = ""
    110 116   
    111 117   for k, v in information.items():
    112 118   self.__dict__[CaseConverter.camel_to_snake(k)] = v
    skipped 80 lines
    193 199   self.url_regexp = None
    194 200   
    195 201   self_copy = copy.deepcopy(self)
    196  - engine_data = self_copy.engine_obj.site
     202 + engine_data = self_copy.engine_obj and self_copy.engine_obj.site or {}
    197 203   site_data_keys = list(self_copy.__dict__.keys())
    198 204   
    199 205   for k in engine_data.keys():
    skipped 247 lines
  • ■ ■ ■ ■ ■
    maigret/types.py
    1  -from typing import Callable, Any, Tuple
     1 +from typing import Callable, List, Dict, Tuple, Any
    2 2   
    3 3   
    4 4  # search query
    5  -QueryDraft = Tuple[Callable, Any, Any]
     5 +QueryDraft = Tuple[Callable, List, Dict]
     6 + 
     7 +# options dict
     8 +QueryOptions = Dict[str, Any]
     9 + 
     10 +# TODO: throw out
     11 +QueryResultWrapper = Dict[str, Any]
    6 12   
  • sites.md
    Diff is too large to be displayed.
  • ■ ■ ■ ■ ■
    tests/conftest.py
    skipped 25 lines
    26 26   
    27 27  def remove_test_reports():
    28 28   reports_list = get_test_reports_filenames()
    29  - for f in reports_list: os.remove(f)
     29 + for f in reports_list:
     30 + os.remove(f)
    30 31   logging.error(f'Removed test reports {reports_list}')
    31 32   
    32 33   
    skipped 13 lines
  • ■ ■ ■ ■ ■
    tests/test_activation.py
    skipped 43 lines
    44 44   
    45 45   url = 'https://httpbin.org/cookies'
    46 46   connector = aiohttp.TCPConnector(ssl=False)
    47  - session = aiohttp.ClientSession(connector=connector, trust_env=True,
    48  - cookie_jar=cookie_jar)
     47 + session = aiohttp.ClientSession(
     48 + connector=connector, trust_env=True, cookie_jar=cookie_jar
     49 + )
    49 50   
    50 51   response = await session.get(url=url)
    51 52   result = json.loads(await response.content.read())
    skipped 4 lines
  • ■ ■ ■ ■ ■
    tests/test_executors.py
    skipped 1 lines
    2 2  import pytest
    3 3  import asyncio
    4 4  import logging
    5  -from maigret.executors import AsyncioSimpleExecutor, AsyncioProgressbarExecutor, \
    6  - AsyncioProgressbarSemaphoreExecutor, AsyncioProgressbarQueueExecutor
     5 +from maigret.executors import (
     6 + AsyncioSimpleExecutor,
     7 + AsyncioProgressbarExecutor,
     8 + AsyncioProgressbarSemaphoreExecutor,
     9 + AsyncioProgressbarQueueExecutor,
     10 +)
    7 11   
    8 12  logger = logging.getLogger(__name__)
     13 + 
    9 14   
    10 15  async def func(n):
    11 16   await asyncio.sleep(0.1 * (n % 3))
    skipped 7 lines
    19 24   assert await executor.run(tasks) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    20 25   assert executor.execution_time > 0.2
    21 26   assert executor.execution_time < 0.3
     27 + 
    22 28   
    23 29  @pytest.mark.asyncio
    24 30  async def test_asyncio_progressbar_executor():
    skipped 40 lines
    65 71   assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
    66 72   assert executor.execution_time > 0.2
    67 73   assert executor.execution_time < 0.3
     74 + 
  • ■ ■ ■ ■ ■ ■
    tests/test_maigret.py
    skipped 7 lines
    8 8  from maigret.sites import MaigretDatabase
    9 9   
    10 10  EXAMPLE_DB = {
    11  - 'engines': {
    12  - },
     11 + 'engines': {},
    13 12   'sites': {
    14 13   "GooglePlayStore": {
    15  - "tags": [
    16  - "global",
    17  - "us"
    18  - ],
     14 + "tags": ["global", "us"],
    19 15   "disabled": False,
    20 16   "checkType": "status_code",
    21 17   "alexaRank": 1,
    22 18   "url": "https://play.google.com/store/apps/developer?id={username}",
    23 19   "urlMain": "https://play.google.com/store",
    24 20   "usernameClaimed": "Facebook_nosuchname",
    25  - "usernameUnclaimed": "noonewouldeverusethis7"
     21 + "usernameUnclaimed": "noonewouldeverusethis7",
    26 22   },
    27 23   "Reddit": {
    28  - "tags": [
    29  - "news",
    30  - "social",
    31  - "us"
    32  - ],
     24 + "tags": ["news", "social", "us"],
    33 25   "checkType": "status_code",
    34  - "presenseStrs": [
    35  - "totalKarma"
    36  - ],
     26 + "presenseStrs": ["totalKarma"],
    37 27   "disabled": True,
    38 28   "alexaRank": 17,
    39 29   "url": "https://www.reddit.com/user/{username}",
    40 30   "urlMain": "https://www.reddit.com/",
    41 31   "usernameClaimed": "blue",
    42  - "usernameUnclaimed": "noonewouldeverusethis7"
     32 + "usernameUnclaimed": "noonewouldeverusethis7",
    43 33   },
    44  - }
     34 + },
    45 35  }
    46 36   
    47 37   
    skipped 60 lines
  • ■ ■ ■ ■ ■
    tests/test_report.py
    skipped 6 lines
    7 7  import xmind
    8 8  from jinja2 import Template
    9 9   
    10  -from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
    11  - save_pdf_report, generate_report_template, generate_report_context, generate_json_report
     10 +from maigret.report import (
     11 + generate_csv_report,
     12 + generate_txt_report,
     13 + save_xmind_report,
     14 + save_html_report,
     15 + save_pdf_report,
     16 + generate_report_template,
     17 + generate_report_context,
     18 + generate_json_report,
     19 +)
    12 20  from maigret.result import QueryResult, QueryStatus
    13 21   
    14 22  EXAMPLE_RESULTS = {
    skipped 2 lines
    17 25   'parsing_enabled': True,
    18 26   'url_main': 'https://www.github.com/',
    19 27   'url_user': 'https://www.github.com/test',
    20  - 'status': QueryResult('test',
    21  - 'GitHub',
    22  - 'https://www.github.com/test',
    23  - QueryStatus.CLAIMED,
    24  - tags=['test_tag']),
     28 + 'status': QueryResult(
     29 + 'test',
     30 + 'GitHub',
     31 + 'https://www.github.com/test',
     32 + QueryStatus.CLAIMED,
     33 + tags=['test_tag'],
     34 + ),
    25 35   'http_status': 200,
    26 36   'is_similar': False,
    27  - 'rank': 78
     37 + 'rank': 78,
    28 38   }
    29 39  }
    30 40   
    skipped 2 lines
    33 43   
    34 44  GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
    35 45  GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
    36  -GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415",
    37  - "username": "alexaimephotographycars", "name": "Alex Aim\u00e9",
    38  - "website": "www.flickr.com/photos/alexaimephotography/",
    39  - "facebook_link": " www.instagram.com/street.reality.photography/",
    40  - "instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"}
     46 +GOOD_500PX_RESULT.ids_data = {
     47 + "uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==",
     48 + "legacy_id": "26403415",
     49 + "username": "alexaimephotographycars",
     50 + "name": "Alex Aim\u00e9",
     51 + "website": "www.flickr.com/photos/alexaimephotography/",
     52 + "facebook_link": " www.instagram.com/street.reality.photography/",
     53 + "instagram_username": "alexaimephotography",
     54 + "twitter_username": "Alexaimephotogr",
     55 +}
    41 56   
    42 57  GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT)
    43 58  GOOD_REDDIT_RESULT.tags = ['news', 'us']
    44  -GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography",
    45  - "fullname": "alexaimephotography",
    46  - "image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e",
    47  - "is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True",
    48  - "has_user_profile": "True", "hide_from_robots": "False",
    49  - "created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"}
     59 +GOOD_REDDIT_RESULT.ids_data = {
     60 + "reddit_id": "t5_1nytpy",
     61 + "reddit_username": "alexaimephotography",
     62 + "fullname": "alexaimephotography",
     63 + "image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e",
     64 + "is_employee": "False",
     65 + "is_nsfw": "False",
     66 + "is_mod": "True",
     67 + "is_following": "True",
     68 + "has_user_profile": "True",
     69 + "hide_from_robots": "False",
     70 + "created_at": "2019-07-10 12:20:03",
     71 + "total_karma": "53959",
     72 + "post_karma": "52738",
     73 +}
    50 74   
    51 75  GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT)
    52 76  GOOD_IG_RESULT.tags = ['photo', 'global']
    53  -GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography",
    54  - "id": "6828488620",
    55  - "image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F",
    56  - "bio": "Photographer \nChild of fine street arts",
    57  - "external_url": "https://www.flickr.com/photos/alexaimephotography2020/"}
     77 +GOOD_IG_RESULT.ids_data = {
     78 + "instagram_username": "alexaimephotography",
     79 + "fullname": "Alexaimephotography",
     80 + "id": "6828488620",
     81 + "image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F",
     82 + "bio": "Photographer \nChild of fine street arts",
     83 + "external_url": "https://www.flickr.com/photos/alexaimephotography2020/",
     84 +}
    58 85   
    59 86  GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT)
    60 87  GOOD_TWITTER_RESULT.tags = ['social', 'us']
    61 88   
    62  -TEST = [('alexaimephotographycars', 'username', {
    63  - '500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
    64  - 'url_user': 'https://500px.com/p/alexaimephotographycars',
    65  - 'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username',
    66  - 'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200,
    67  - 'is_similar': False, 'rank': 2981},
    68  - 'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
    69  - 'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT,
    70  - 'http_status': 404, 'is_similar': False, 'rank': 17},
    71  - 'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
    72  - 'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400,
    73  - 'is_similar': False, 'rank': 55},
    74  - 'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True,
    75  - 'url_main': 'https://www.instagram.com/',
    76  - 'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT,
    77  - 'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {
    78  - '500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
    79  - 'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200,
    80  - 'is_similar': False, 'rank': 2981},
    81  - 'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
    82  - 'url_user': 'https://www.reddit.com/user/alexaimephotography',
    83  - 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200,
    84  - 'is_similar': False, 'rank': 17},
    85  - 'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
    86  - 'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400,
    87  - 'is_similar': False, 'rank': 55},
    88  - 'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/',
    89  - 'url_user': 'https://www.instagram.com/alexaimephotography',
    90  - 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200,
    91  - 'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {
    92  - '500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
    93  - 'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200,
    94  - 'is_similar': False, 'rank': 2981},
    95  - 'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
    96  - 'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404,
    97  - 'is_similar': False, 'rank': 17},
    98  - 'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
    99  - 'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400,
    100  - 'is_similar': False, 'rank': 55},
    101  - 'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/',
    102  - 'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404,
    103  - 'is_similar': False, 'rank': 29}})]
     89 +TEST = [
     90 + (
     91 + 'alexaimephotographycars',
     92 + 'username',
     93 + {
     94 + '500px': {
     95 + 'username': 'alexaimephotographycars',
     96 + 'parsing_enabled': True,
     97 + 'url_main': 'https://500px.com/',
     98 + 'url_user': 'https://500px.com/p/alexaimephotographycars',
     99 + 'ids_usernames': {
     100 + 'alexaimephotographycars': 'username',
     101 + 'alexaimephotography': 'username',
     102 + 'Alexaimephotogr': 'username',
     103 + },
     104 + 'status': GOOD_500PX_RESULT,
     105 + 'http_status': 200,
     106 + 'is_similar': False,
     107 + 'rank': 2981,
     108 + },
     109 + 'Reddit': {
     110 + 'username': 'alexaimephotographycars',
     111 + 'parsing_enabled': True,
     112 + 'url_main': 'https://www.reddit.com/',
     113 + 'url_user': 'https://www.reddit.com/user/alexaimephotographycars',
     114 + 'status': BAD_RESULT,
     115 + 'http_status': 404,
     116 + 'is_similar': False,
     117 + 'rank': 17,
     118 + },
     119 + 'Twitter': {
     120 + 'username': 'alexaimephotographycars',
     121 + 'parsing_enabled': True,
     122 + 'url_main': 'https://www.twitter.com/',
     123 + 'url_user': 'https://twitter.com/alexaimephotographycars',
     124 + 'status': BAD_RESULT,
     125 + 'http_status': 400,
     126 + 'is_similar': False,
     127 + 'rank': 55,
     128 + },
     129 + 'Instagram': {
     130 + 'username': 'alexaimephotographycars',
     131 + 'parsing_enabled': True,
     132 + 'url_main': 'https://www.instagram.com/',
     133 + 'url_user': 'https://www.instagram.com/alexaimephotographycars',
     134 + 'status': BAD_RESULT,
     135 + 'http_status': 404,
     136 + 'is_similar': False,
     137 + 'rank': 29,
     138 + },
     139 + },
     140 + ),
     141 + (
     142 + 'alexaimephotography',
     143 + 'username',
     144 + {
     145 + '500px': {
     146 + 'username': 'alexaimephotography',
     147 + 'parsing_enabled': True,
     148 + 'url_main': 'https://500px.com/',
     149 + 'url_user': 'https://500px.com/p/alexaimephotography',
     150 + 'status': BAD_RESULT,
     151 + 'http_status': 200,
     152 + 'is_similar': False,
     153 + 'rank': 2981,
     154 + },
     155 + 'Reddit': {
     156 + 'username': 'alexaimephotography',
     157 + 'parsing_enabled': True,
     158 + 'url_main': 'https://www.reddit.com/',
     159 + 'url_user': 'https://www.reddit.com/user/alexaimephotography',
     160 + 'ids_usernames': {'alexaimephotography': 'username'},
     161 + 'status': GOOD_REDDIT_RESULT,
     162 + 'http_status': 200,
     163 + 'is_similar': False,
     164 + 'rank': 17,
     165 + },
     166 + 'Twitter': {
     167 + 'username': 'alexaimephotography',
     168 + 'parsing_enabled': True,
     169 + 'url_main': 'https://www.twitter.com/',
     170 + 'url_user': 'https://twitter.com/alexaimephotography',
     171 + 'status': BAD_RESULT,
     172 + 'http_status': 400,
     173 + 'is_similar': False,
     174 + 'rank': 55,
     175 + },
     176 + 'Instagram': {
     177 + 'username': 'alexaimephotography',
     178 + 'parsing_enabled': True,
     179 + 'url_main': 'https://www.instagram.com/',
     180 + 'url_user': 'https://www.instagram.com/alexaimephotography',
     181 + 'ids_usernames': {'alexaimephotography': 'username'},
     182 + 'status': GOOD_IG_RESULT,
     183 + 'http_status': 200,
     184 + 'is_similar': False,
     185 + 'rank': 29,
     186 + },
     187 + },
     188 + ),
     189 + (
     190 + 'Alexaimephotogr',
     191 + 'username',
     192 + {
     193 + '500px': {
     194 + 'username': 'Alexaimephotogr',
     195 + 'parsing_enabled': True,
     196 + 'url_main': 'https://500px.com/',
     197 + 'url_user': 'https://500px.com/p/Alexaimephotogr',
     198 + 'status': BAD_RESULT,
     199 + 'http_status': 200,
     200 + 'is_similar': False,
     201 + 'rank': 2981,
     202 + },
     203 + 'Reddit': {
     204 + 'username': 'Alexaimephotogr',
     205 + 'parsing_enabled': True,
     206 + 'url_main': 'https://www.reddit.com/',
     207 + 'url_user': 'https://www.reddit.com/user/Alexaimephotogr',
     208 + 'status': BAD_RESULT,
     209 + 'http_status': 404,
     210 + 'is_similar': False,
     211 + 'rank': 17,
     212 + },
     213 + 'Twitter': {
     214 + 'username': 'Alexaimephotogr',
     215 + 'parsing_enabled': True,
     216 + 'url_main': 'https://www.twitter.com/',
     217 + 'url_user': 'https://twitter.com/Alexaimephotogr',
     218 + 'status': GOOD_TWITTER_RESULT,
     219 + 'http_status': 400,
     220 + 'is_similar': False,
     221 + 'rank': 55,
     222 + },
     223 + 'Instagram': {
     224 + 'username': 'Alexaimephotogr',
     225 + 'parsing_enabled': True,
     226 + 'url_main': 'https://www.instagram.com/',
     227 + 'url_user': 'https://www.instagram.com/Alexaimephotogr',
     228 + 'status': BAD_RESULT,
     229 + 'http_status': 404,
     230 + 'is_similar': False,
     231 + 'rank': 29,
     232 + },
     233 + },
     234 + ),
     235 +]
    104 236   
    105 237  SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
    106 238   
    skipped 80 lines
    187 319   assert data['topic']['topics'][0]['title'] == 'Undefined'
    188 320   assert data['topic']['topics'][1]['title'] == 'test_tag'
    189 321   assert len(data['topic']['topics'][1]['topics']) == 1
    190  - assert data['topic']['topics'][1]['topics'][0]['label'] == 'https://www.github.com/test'
     322 + assert (
     323 + data['topic']['topics'][1]['topics'][0]['label']
     324 + == 'https://www.github.com/test'
     325 + )
    191 326   
    192 327   
    193 328  def test_html_report():
    skipped 18 lines
  • ■ ■ ■ ■ ■ ■
    tests/test_sites.py
    skipped 9 lines
    10 10   "The specified member cannot be found. Please enter a member's entire name.",
    11 11   ],
    12 12   "checkType": "message",
    13  - "errors": {
    14  - "You must be logged-in to do that.": "Login required"
    15  - },
    16  - "url": "{urlMain}{urlSubpath}/members/?username={username}"
    17  - }
     13 + "errors": {"You must be logged-in to do that.": "Login required"},
     14 + "url": "{urlMain}{urlSubpath}/members/?username={username}",
     15 + },
    18 16   },
    19 17   },
    20 18   'sites': {
    21 19   "Amperka": {
    22 20   "engine": "XenForo",
    23 21   "rank": 121613,
    24  - "tags": [
    25  - "ru"
    26  - ],
     22 + "tags": ["ru"],
    27 23   "urlMain": "http://forum.amperka.ru",
    28 24   "usernameClaimed": "adam",
    29  - "usernameUnclaimed": "noonewouldeverusethis7"
     25 + "usernameUnclaimed": "noonewouldeverusethis7",
    30 26   },
    31  - }
     27 + },
    32 28  }
    33 29   
    34 30   
    skipped 81 lines
    116 112   db = MaigretDatabase()
    117 113   db.load_from_json(EXAMPLE_DB)
    118 114   
    119  - assert db.sites[0].url_regexp.pattern == r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
    120  - assert db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test') == 'test'
     115 + assert (
     116 + db.sites[0].url_regexp.pattern
     117 + == r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
     118 + )
     119 + assert (
     120 + db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
     121 + == 'test'
     122 + )
    121 123   
    122 124   
    123 125  def test_ranked_sites_dict():
    skipped 56 lines
  • ■ ■ ■ ■ ■
    tests/test_utils.py
    skipped 1 lines
    2 2  import itertools
    3 3  import re
    4 4   
    5  -from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher, get_dict_ascii_tree
     5 +from maigret.utils import (
     6 + CaseConverter,
     7 + is_country_tag,
     8 + enrich_link_str,
     9 + URLMatcher,
     10 + get_dict_ascii_tree,
     11 +)
    6 12   
    7 13   
    8 14  def test_case_convert_camel_to_snake():
    skipped 36 lines
    45 51   
    46 52  def test_enrich_link_str():
    47 53   assert enrich_link_str('test') == 'test'
    48  - assert enrich_link_str(
    49  - ' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
     54 + assert (
     55 + enrich_link_str(' www.flickr.com/photos/alexaimephotography/')
     56 + == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
     57 + )
    50 58   
    51 59   
    52 60  def test_url_extract_main_part():
    skipped 25 lines
    78 86   
    79 87   for url_parts in itertools.product(*parts):
    80 88   url = ''.join(url_parts)
    81  - assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
     89 + assert (
     90 + URLMatcher.make_profile_url_regexp(url).pattern
     91 + == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
     92 + )
    82 93   
    83 94   
    84 95  def test_get_dict_ascii_tree():
    85  - data = {'uid': 'dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==', 'legacy_id': '26403415', 'username': 'alexaimephotographycars', 'name': 'Alex Aimé', 'created_at': '2018-05-04T10:17:01.000+0000', 'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b', 'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201', 'website': 'www.instagram.com/street.reality.photography/', 'facebook_link': ' www.instagram.com/street.reality.photography/', 'instagram_username': 'Street.Reality.Photography', 'twitter_username': 'Alexaimephotogr'}
     96 + data = {
     97 + 'uid': 'dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==',
     98 + 'legacy_id': '26403415',
     99 + 'username': 'alexaimephotographycars',
     100 + 'name': 'Alex Aimé',
     101 + 'created_at': '2018-05-04T10:17:01.000+0000',
     102 + 'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b',
     103 + 'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201',
     104 + 'website': 'www.instagram.com/street.reality.photography/',
     105 + 'facebook_link': ' www.instagram.com/street.reality.photography/',
     106 + 'instagram_username': 'Street.Reality.Photography',
     107 + 'twitter_username': 'Alexaimephotogr',
     108 + }
    86 109   
    87 110   ascii_tree = get_dict_ascii_tree(data.items())
    88 111   
    89  - assert ascii_tree == """
     112 + assert (
     113 + ascii_tree
     114 + == """
    90 115  ┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
    91 116  ┣╸legacy_id: 26403415
    92 117  ┣╸username: alexaimephotographycars
    skipped 5 lines
    98 123  ┣╸facebook_link: www.instagram.com/street.reality.photography/
    99 124  ┣╸instagram_username: Street.Reality.Photography
    100 125  ┗╸twitter_username: Alexaimephotogr"""
     126 + )
     127 + 
Please wait...
Page is in error, reload to recover