Projects STRLCPY maigret Commits bfa6afac
🤬
  • Refactoring and linting, added notifications about frequent search errors

  • Loading...
  • Soxoj committed 3 years ago
    bfa6afac
    1 parent bfaf276f
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    format.sh
     1 +#!/bin/sh
     2 +FILES="maigret wizard.py maigret.py"
     3 + 
     4 +echo 'black'
     5 +black --skip-string-normalization $FILES
  • ■ ■ ■ ■ ■ ■
    lint.sh
     1 +#!/bin/sh
     2 +FILES="maigret wizard.py maigret.py"
     3 + 
     4 +echo 'syntax errors or undefined names'
     5 +flake8 --count --select=E9,F63,F7,F82 --show-source --statistics $FILES
     6 + 
     7 +echo 'warning'
     8 +flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503 $FILES
     9 + 
     10 +echo 'mypy'
     11 +mypy ./maigret
  • ■ ■ ■ ■ ■
    maigret/__init__.py
    skipped 2 lines
    3 3  from .checking import maigret as search
    4 4  from .sites import MaigretEngine, MaigretSite, MaigretDatabase
    5 5  from .notify import QueryNotifyPrint as Notifier
     6 + 
  • ■ ■ ■ ■ ■ ■
    maigret/activation.py
    skipped 8 lines
    9 9   @staticmethod
    10 10   def twitter(site, logger, cookies={}):
    11 11   headers = dict(site.headers)
    12  - del headers['x-guest-token']
    13  - r = requests.post(site.activation['url'], headers=headers)
     12 + del headers["x-guest-token"]
     13 + r = requests.post(site.activation["url"], headers=headers)
    14 14   logger.info(r)
    15 15   j = r.json()
    16  - guest_token = j[site.activation['src']]
    17  - site.headers['x-guest-token'] = guest_token
     16 + guest_token = j[site.activation["src"]]
     17 + site.headers["x-guest-token"] = guest_token
    18 18   
    19 19   @staticmethod
    20 20   def vimeo(site, logger, cookies={}):
    21 21   headers = dict(site.headers)
    22  - if 'Authorization' in headers:
    23  - del headers['Authorization']
    24  - r = requests.get(site.activation['url'], headers=headers)
    25  - jwt_token = r.json()['jwt']
    26  - site.headers['Authorization'] = 'jwt ' + jwt_token
     22 + if "Authorization" in headers:
     23 + del headers["Authorization"]
     24 + r = requests.get(site.activation["url"], headers=headers)
     25 + jwt_token = r.json()["jwt"]
     26 + site.headers["Authorization"] = "jwt " + jwt_token
    27 27   
    28 28   @staticmethod
    29 29   def spotify(site, logger, cookies={}):
    30 30   headers = dict(site.headers)
    31  - if 'Authorization' in headers:
    32  - del headers['Authorization']
    33  - r = requests.get(site.activation['url'])
    34  - bearer_token = r.json()['accessToken']
    35  - site.headers['authorization'] = f'Bearer {bearer_token}'
     31 + if "Authorization" in headers:
     32 + del headers["Authorization"]
     33 + r = requests.get(site.activation["url"])
     34 + bearer_token = r.json()["accessToken"]
     35 + site.headers["authorization"] = f"Bearer {bearer_token}"
    36 36   
    37 37   @staticmethod
    38 38   def xssis(site, logger, cookies={}):
    39 39   if not cookies:
    40  - logger.debug('You must have cookies to activate xss.is parsing!')
     40 + logger.debug("You must have cookies to activate xss.is parsing!")
    41 41   return
    42 42   
    43 43   headers = dict(site.headers)
    44 44   post_data = {
    45  - '_xfResponseType': 'json',
    46  - '_xfToken': '1611177919,a2710362e45dad9aa1da381e21941a38'
     45 + "_xfResponseType": "json",
     46 + "_xfToken": "1611177919,a2710362e45dad9aa1da381e21941a38",
    47 47   }
    48  - headers['content-type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
    49  - r = requests.post(site.activation['url'], headers=headers, cookies=cookies, data=post_data)
    50  - csrf = r.json()['csrf']
    51  - site.get_params['_xfToken'] = csrf
     48 + headers["content-type"] = "application/x-www-form-urlencoded; charset=UTF-8"
     49 + r = requests.post(
     50 + site.activation["url"], headers=headers, cookies=cookies, data=post_data
     51 + )
     52 + csrf = r.json()["csrf"]
     53 + site.get_params["_xfToken"] = csrf
    52 54   
    53 55   
    54 56  async def import_aiohttp_cookies(cookiestxt_filename):
    skipped 7 lines
    62 64   for key, cookie in list(domain.values())[0].items():
    63 65   c = Morsel()
    64 66   c.set(key, cookie.value, cookie.value)
    65  - c['domain'] = cookie.domain
    66  - c['path'] = cookie.path
     67 + c["domain"] = cookie.domain
     68 + c["path"] = cookie.path
    67 69   cookies_list.append((key, c))
    68 70   
    69 71   cookies.update_cookies(cookies_list)
    skipped 3 lines
  • ■ ■ ■ ■ ■ ■
    maigret/checking.py
    skipped 4 lines
    5 5  import ssl
    6 6  import sys
    7 7  import tqdm
    8  -import time
     8 +from typing import Tuple, Optional
    9 9   
    10 10  import aiohttp
    11 11  import tqdm.asyncio
    12 12  from aiohttp_socks import ProxyConnector
    13  -from mock import Mock
    14 13  from python_socks import _errors as proxy_errors
    15 14  from socid_extractor import extract
    16 15   
    17 16  from .activation import ParsingActivator, import_aiohttp_cookies
     17 +from . import errors
     18 +from .errors import CheckError
    18 19  from .executors import AsyncioSimpleExecutor, AsyncioProgressbarQueueExecutor
    19 20  from .result import QueryResult, QueryStatus
    20 21  from .sites import MaigretDatabase, MaigretSite
    21  -from .types import CheckError
    22 22  from .utils import get_random_user_agent
    23 23   
    24 24   
    25 25  supported_recursive_search_ids = (
    26  - 'yandex_public_id',
    27  - 'gaia_id',
    28  - 'vk_id',
    29  - 'ok_id',
    30  - 'wikimapia_uid',
    31  - 'steam_id',
    32  - 'uidme_uguid',
     26 + "yandex_public_id",
     27 + "gaia_id",
     28 + "vk_id",
     29 + "ok_id",
     30 + "wikimapia_uid",
     31 + "steam_id",
     32 + "uidme_uguid",
    33 33  )
    34 34   
    35  -common_errors = {
    36  - '<title>Attention Required! | Cloudflare</title>': CheckError('Captcha', 'Cloudflare'),
    37  - 'Please stand by, while we are checking your browser': CheckError('Bot protection', 'Cloudflare'),
    38  - '<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
    39  - 'document.getElementById(\'validate_form_submit\').disabled=true': CheckError('Captcha', 'Mail.ru'),
    40  - 'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': CheckError('Bot protection', 'Blazingfast'),
    41  - '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': CheckError('Resolving', 'MegaFon 404 page'),
    42  - 'Доступ к информационному ресурсу ограничен на основании Федерального закона': CheckError('Censorship', 'MGTS'),
    43  - 'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
    44  -}
     35 +unsupported_characters = "#"
    45 36   
    46  -unsupported_characters = '#'
    47 37   
    48  - 
    49  -async def get_response(request_future, site_name, logger) -> (str, int, CheckError):
     38 +async def get_response(
     39 + request_future, site_name, logger
     40 +) -> Tuple[str, int, Optional[CheckError]]:
    50 41   html_text = None
    51 42   status_code = 0
    52  - error = CheckError('Error')
     43 + error: Optional[CheckError] = CheckError("Error")
    53 44   
    54 45   try:
    55 46   response = await request_future
    56 47   
    57 48   status_code = response.status
    58 49   response_content = await response.content.read()
    59  - charset = response.charset or 'utf-8'
    60  - decoded_content = response_content.decode(charset, 'ignore')
     50 + charset = response.charset or "utf-8"
     51 + decoded_content = response_content.decode(charset, "ignore")
    61 52   html_text = decoded_content
    62 53   
    63 54   if status_code == 0:
    64  - error = CheckError('Connection lost')
     55 + error = CheckError("Connection lost")
    65 56   else:
    66 57   error = None
    67 58   
    68 59   logger.debug(html_text)
    69 60   
    70 61   except asyncio.TimeoutError as e:
    71  - error = CheckError('Request timeout', str(e))
     62 + error = CheckError("Request timeout", str(e))
    72 63   except aiohttp.client_exceptions.ClientConnectorError as e:
    73  - error = CheckError('Connecting failure', str(e))
     64 + error = CheckError("Connecting failure", str(e))
    74 65   except aiohttp.http_exceptions.BadHttpMessage as e:
    75  - error = CheckError('HTTP', str(e))
     66 + error = CheckError("HTTP", str(e))
    76 67   except proxy_errors.ProxyError as e:
    77  - error = CheckError('Proxy', str(e))
     68 + error = CheckError("Proxy", str(e))
     69 + except KeyboardInterrupt:
     70 + error = CheckError("Interrupted")
    78 71   except Exception as e:
    79 72   # python-specific exceptions
    80 73   if sys.version_info.minor > 6:
    81  - if isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError):
    82  - error = CheckError('SSL', str(e))
     74 + if isinstance(e, ssl.SSLCertVerificationError) or isinstance(
     75 + e, ssl.SSLError
     76 + ):
     77 + error = CheckError("SSL", str(e))
    83 78   else:
    84  - logger.warning(f'Unhandled error while requesting {site_name}: {e}')
     79 + logger.warning(f"Unhandled error while requesting {site_name}: {e}")
    85 80   logger.debug(e, exc_info=True)
    86  - error = CheckError('Error', str(e))
     81 + error = CheckError("Error", str(e))
    87 82   
    88 83   # TODO: return only needed information
    89  - return html_text, status_code, error
     84 + return str(html_text), status_code, error
    90 85   
    91 86   
    92  -async def update_site_dict_from_response(sitename, site_dict, results_info, logger, query_notify):
     87 +async def update_site_dict_from_response(
     88 + sitename, site_dict, results_info, logger, query_notify
     89 +):
    93 90   site_obj = site_dict[sitename]
    94 91   future = site_obj.request_future
    95 92   if not future:
    96 93   # ignore: search by incompatible id type
    97 94   return
    98 95   
    99  - response = await get_response(request_future=future,
    100  - site_name=sitename,
    101  - logger=logger)
     96 + response = await get_response(
     97 + request_future=future, site_name=sitename, logger=logger
     98 + )
    102 99   
    103  - return sitename, process_site_result(response, query_notify, logger, results_info, site_obj)
     100 + return sitename, process_site_result(
     101 + response, query_notify, logger, results_info, site_obj
     102 + )
    104 103   
    105 104   
    106 105  # TODO: move to separate class
    107  -def detect_error_page(html_text, status_code, fail_flags, ignore_403) -> CheckError:
     106 +def detect_error_page(
     107 + html_text, status_code, fail_flags, ignore_403
     108 +) -> Optional[CheckError]:
    108 109   # Detect service restrictions such as a country restriction
    109 110   for flag, msg in fail_flags.items():
    110 111   if flag in html_text:
    111  - return CheckError('Site-specific', msg)
     112 + return CheckError("Site-specific", msg)
    112 113   
    113 114   # Detect common restrictions such as provider censorship and bot protection
    114  - for flag, err in common_errors.items():
    115  - if flag in html_text:
    116  - return err
     115 + err = errors.detect(html_text)
     116 + if err:
     117 + return err
    117 118   
    118 119   # Detect common site errors
    119 120   if status_code == 403 and not ignore_403:
    120  - return CheckError('Access denied', '403 status code, use proxy/vpn')
     121 + return CheckError("Access denied", "403 status code, use proxy/vpn")
    121 122   
    122 123   elif status_code >= 500:
    123  - return CheckError(f'Server', f'{status_code} status code')
     124 + return CheckError("Server", f"{status_code} status code")
    124 125   
    125 126   return None
    126 127   
    127 128   
    128  -def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
     129 +def process_site_result(
     130 + response, query_notify, logger, results_info, site: MaigretSite
     131 +):
    129 132   if not response:
    130 133   return results_info
    131 134   
    132 135   fulltags = site.tags
    133 136   
    134 137   # Retrieve other site information again
    135  - username = results_info['username']
    136  - is_parsing_enabled = results_info['parsing_enabled']
     138 + username = results_info["username"]
     139 + is_parsing_enabled = results_info["parsing_enabled"]
    137 140   url = results_info.get("url_user")
    138 141   logger.debug(url)
    139 142   
    skipped 7 lines
    147 150   
    148 151   # TODO: refactor
    149 152   if not response:
    150  - logger.error(f'No response for {site.name}')
     153 + logger.error(f"No response for {site.name}")
    151 154   return results_info
    152 155   
    153 156   html_text, status_code, check_error = response
    skipped 2 lines
    156 159   response_time = None
    157 160   
    158 161   if logger.level == logging.DEBUG:
    159  - with open('debug.txt', 'a') as f:
    160  - status = status_code or 'No response'
    161  - f.write(f'url: {url}\nerror: {check_error}\nr: {status}\n')
     162 + with open("debug.txt", "a") as f:
     163 + status = status_code or "No response"
     164 + f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
    162 165   if html_text:
    163  - f.write(f'code: {status}\nresponse: {str(html_text)}\n')
     166 + f.write(f"code: {status}\nresponse: {str(html_text)}\n")
    164 167   
    165 168   # additional check for errors
    166 169   if status_code and not check_error:
    167  - check_error = detect_error_page(html_text, status_code, site.errors, site.ignore403)
     170 + check_error = detect_error_page(
     171 + html_text, status_code, site.errors, site.ignore403
     172 + )
    168 173   
    169 174   if site.activation and html_text:
    170  - is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
     175 + is_need_activation = any(
     176 + [s for s in site.activation["marks"] if s in html_text]
     177 + )
    171 178   if is_need_activation:
    172  - method = site.activation['method']
     179 + method = site.activation["method"]
    173 180   try:
    174 181   activate_fun = getattr(ParsingActivator(), method)
    175 182   # TODO: async call
    176 183   activate_fun(site, logger)
    177 184   except AttributeError:
    178  - logger.warning(f'Activation method {method} for site {site.name} not found!')
     185 + logger.warning(
     186 + f"Activation method {method} for site {site.name} not found!"
     187 + )
    179 188   except Exception as e:
    180  - logger.warning(f'Failed activation {method} for site {site.name}: {e}')
     189 + logger.warning(f"Failed activation {method} for site {site.name}: {e}")
    181 190   
    182 191   site_name = site.pretty_name
    183 192   # presense flags
    skipped 3 lines
    187 196   if html_text:
    188 197   if not presense_flags:
    189 198   is_presense_detected = True
    190  - site.stats['presense_flag'] = None
     199 + site.stats["presense_flag"] = None
    191 200   else:
    192 201   for presense_flag in presense_flags:
    193 202   if presense_flag in html_text:
    194 203   is_presense_detected = True
    195  - site.stats['presense_flag'] = presense_flag
     204 + site.stats["presense_flag"] = presense_flag
    196 205   logger.debug(presense_flag)
    197 206   break
    198 207   
    199 208   if check_error:
    200 209   logger.debug(check_error)
    201  - result = QueryResult(username,
    202  - site_name,
    203  - url,
    204  - QueryStatus.UNKNOWN,
    205  - query_time=response_time,
    206  - error=check_error,
    207  - context=str(CheckError), tags=fulltags)
     210 + result = QueryResult(
     211 + username,
     212 + site_name,
     213 + url,
     214 + QueryStatus.UNKNOWN,
     215 + query_time=response_time,
     216 + error=check_error,
     217 + context=str(CheckError),
     218 + tags=fulltags,
     219 + )
    208 220   elif check_type == "message":
    209 221   absence_flags = site.absence_strs
    210 222   is_absence_flags_list = isinstance(absence_flags, list)
    211  - absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
     223 + absence_flags_set = (
     224 + set(absence_flags) if is_absence_flags_list else {absence_flags}
     225 + )
    212 226   # Checks if the error message is in the HTML
    213  - is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
     227 + is_absence_detected = any(
     228 + [(absence_flag in html_text) for absence_flag in absence_flags_set]
     229 + )
    214 230   if not is_absence_detected and is_presense_detected:
    215  - result = QueryResult(username,
    216  - site_name,
    217  - url,
    218  - QueryStatus.CLAIMED,
    219  - query_time=response_time, tags=fulltags)
     231 + result = QueryResult(
     232 + username,
     233 + site_name,
     234 + url,
     235 + QueryStatus.CLAIMED,
     236 + query_time=response_time,
     237 + tags=fulltags,
     238 + )
    220 239   else:
    221  - result = QueryResult(username,
    222  - site_name,
    223  - url,
    224  - QueryStatus.AVAILABLE,
    225  - query_time=response_time, tags=fulltags)
     240 + result = QueryResult(
     241 + username,
     242 + site_name,
     243 + url,
     244 + QueryStatus.AVAILABLE,
     245 + query_time=response_time,
     246 + tags=fulltags,
     247 + )
    226 248   elif check_type == "status_code":
    227 249   # Checks if the status code of the response is 2XX
    228 250   if (not status_code >= 300 or status_code < 200) and is_presense_detected:
    229  - result = QueryResult(username,
    230  - site_name,
    231  - url,
    232  - QueryStatus.CLAIMED,
    233  - query_time=response_time, tags=fulltags)
     251 + result = QueryResult(
     252 + username,
     253 + site_name,
     254 + url,
     255 + QueryStatus.CLAIMED,
     256 + query_time=response_time,
     257 + tags=fulltags,
     258 + )
    234 259   else:
    235  - result = QueryResult(username,
    236  - site_name,
    237  - url,
    238  - QueryStatus.AVAILABLE,
    239  - query_time=response_time, tags=fulltags)
     260 + result = QueryResult(
     261 + username,
     262 + site_name,
     263 + url,
     264 + QueryStatus.AVAILABLE,
     265 + query_time=response_time,
     266 + tags=fulltags,
     267 + )
    240 268   elif check_type == "response_url":
    241 269   # For this detection method, we have turned off the redirect.
    242 270   # So, there is no need to check the response URL: it will always
    skipped 1 lines
    244 272   # code indicates that the request was successful (i.e. no 404, or
    245 273   # forward to some odd redirect).
    246 274   if 200 <= status_code < 300 and is_presense_detected:
    247  - result = QueryResult(username,
    248  - site_name,
    249  - url,
    250  - QueryStatus.CLAIMED,
    251  - query_time=response_time, tags=fulltags)
     275 + result = QueryResult(
     276 + username,
     277 + site_name,
     278 + url,
     279 + QueryStatus.CLAIMED,
     280 + query_time=response_time,
     281 + tags=fulltags,
     282 + )
    252 283   else:
    253  - result = QueryResult(username,
    254  - site_name,
    255  - url,
    256  - QueryStatus.AVAILABLE,
    257  - query_time=response_time, tags=fulltags)
     284 + result = QueryResult(
     285 + username,
     286 + site_name,
     287 + url,
     288 + QueryStatus.AVAILABLE,
     289 + query_time=response_time,
     290 + tags=fulltags,
     291 + )
    258 292   else:
    259 293   # It should be impossible to ever get here...
    260  - raise ValueError(f"Unknown check type '{check_type}' for "
    261  - f"site '{site.name}'")
     294 + raise ValueError(
     295 + f"Unknown check type '{check_type}' for " f"site '{site.name}'"
     296 + )
    262 297   
    263 298   extracted_ids_data = {}
    264 299   
    skipped 1 lines
    266 301   try:
    267 302   extracted_ids_data = extract(html_text)
    268 303   except Exception as e:
    269  - logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True)
     304 + logger.warning(f"Error while parsing {site.name}: {e}", exc_info=True)
    270 305   
    271 306   if extracted_ids_data:
    272 307   new_usernames = {}
    273 308   for k, v in extracted_ids_data.items():
    274  - if 'username' in k:
    275  - new_usernames[v] = 'username'
     309 + if "username" in k:
     310 + new_usernames[v] = "username"
    276 311   if k in supported_recursive_search_ids:
    277 312   new_usernames[v] = k
    278 313   
    279  - results_info['ids_usernames'] = new_usernames
    280  - results_info['ids_links'] = eval(extracted_ids_data.get('links', '[]'))
     314 + results_info["ids_usernames"] = new_usernames
     315 + results_info["ids_links"] = eval(extracted_ids_data.get("links", "[]"))
    281 316   result.ids_data = extracted_ids_data
    282 317   
    283 318   # Notify caller about results of query.
    284 319   query_notify.update(result, site.similar_search)
    285 320   
    286 321   # Save status of request
    287  - results_info['status'] = result
     322 + results_info["status"] = result
    288 323   
    289 324   # Save results from request
    290  - results_info['http_status'] = status_code
    291  - results_info['is_similar'] = site.similar_search
     325 + results_info["http_status"] = status_code
     326 + results_info["is_similar"] = site.similar_search
    292 327   # results_site['response_text'] = html_text
    293  - results_info['rank'] = site.alexa_rank
     328 + results_info["rank"] = site.alexa_rank
    294 329   return results_info
    295 330   
    296 331   
    297  -async def maigret(username, site_dict, logger, query_notify=None,
    298  - proxy=None, timeout=None, is_parsing_enabled=False,
    299  - id_type='username', debug=False, forced=False,
    300  - max_connections=100, no_progressbar=False,
    301  - cookies=None):
     332 +async def maigret(
     333 + username,
     334 + site_dict,
     335 + logger,
     336 + query_notify=None,
     337 + proxy=None,
     338 + timeout=None,
     339 + is_parsing_enabled=False,
     340 + id_type="username",
     341 + debug=False,
     342 + forced=False,
     343 + max_connections=100,
     344 + no_progressbar=False,
     345 + cookies=None,
     346 +):
    302 347   """Main search func
    303 348   
    304 349   Checks for existence of username on certain sites.
    skipped 37 lines
    342 387   query_notify.start(username, id_type)
    343 388   
    344 389   # TODO: connector
    345  - connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
     390 + connector = (
     391 + ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
     392 + )
    346 393   # connector = aiohttp.TCPConnector(ssl=False)
    347 394   connector.verify_ssl = False
    348 395   
    349 396   cookie_jar = None
    350 397   if cookies:
    351  - logger.debug(f'Using cookies jar file {cookies}')
     398 + logger.debug(f"Using cookies jar file {cookies}")
    352 399   cookie_jar = await import_aiohttp_cookies(cookies)
    353 400   
    354  - session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar)
     401 + session = aiohttp.ClientSession(
     402 + connector=connector, trust_env=True, cookie_jar=cookie_jar
     403 + )
    355 404   
    356 405   if logger.level == logging.DEBUG:
    357  - future = session.get(url='https://icanhazip.com')
     406 + future = session.get(url="https://icanhazip.com")
    358 407   ip, status, check_error = await get_response(future, None, logger)
    359 408   if ip:
    360  - logger.debug(f'My IP is: {ip.strip()}')
     409 + logger.debug(f"My IP is: {ip.strip()}")
    361 410   else:
    362  - logger.debug(f'IP requesting {check_error[0]}: {check_error[1]}')
     411 + logger.debug(f"IP requesting {check_error[0]}: {check_error[1]}")
    363 412   
    364 413   # Results from analysis of all sites
    365 414   results_total = {}
    skipped 5 lines
    371 420   continue
    372 421   
    373 422   if site.disabled and not forced:
    374  - logger.debug(f'Site {site.name} is disabled, skipping...')
     423 + logger.debug(f"Site {site.name} is disabled, skipping...")
    375 424   continue
    376 425   
    377 426   # Results from analysis of this specific site
    378 427   results_site = {}
    379 428   
    380 429   # Record URL of main site and username
    381  - results_site['username'] = username
    382  - results_site['parsing_enabled'] = is_parsing_enabled
    383  - results_site['url_main'] = site.url_main
    384  - results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
     430 + results_site["username"] = username
     431 + results_site["parsing_enabled"] = is_parsing_enabled
     432 + results_site["url_main"] = site.url_main
     433 + results_site["cookies"] = (
     434 + cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
     435 + )
    385 436   
    386 437   headers = {
    387  - 'User-Agent': get_random_user_agent(),
     438 + "User-Agent": get_random_user_agent(),
    388 439   }
    389 440   
    390 441   headers.update(site.headers)
    391 442   
    392  - if 'url' not in site.__dict__:
    393  - logger.error('No URL for site %s', site.name)
     443 + if "url" not in site.__dict__:
     444 + logger.error("No URL for site %s", site.name)
    394 445   # URL of user on site (if it exists)
    395 446   url = site.url.format(
    396  - urlMain=site.url_main,
    397  - urlSubpath=site.url_subpath,
    398  - username=username
     447 + urlMain=site.url_main, urlSubpath=site.url_subpath, username=username
    399 448   )
    400 449   # workaround to prevent slash errors
    401  - url = re.sub('(?<!:)/+', '/', url)
     450 + url = re.sub("(?<!:)/+", "/", url)
    402 451   
    403 452   # Don't make request if username is invalid for the site
    404 453   if site.regex_check and re.search(site.regex_check, username) is None:
    405 454   # No need to do the check at the site: this user name is not allowed.
    406  - results_site['status'] = QueryResult(username,
    407  - site_name,
    408  - url,
    409  - QueryStatus.ILLEGAL)
     455 + results_site["status"] = QueryResult(
     456 + username, site_name, url, QueryStatus.ILLEGAL
     457 + )
    410 458   results_site["url_user"] = ""
    411  - results_site['http_status'] = ""
    412  - results_site['response_text'] = ""
    413  - query_notify.update(results_site['status'])
     459 + results_site["http_status"] = ""
     460 + results_site["response_text"] = ""
     461 + query_notify.update(results_site["status"])
    414 462   else:
    415 463   # URL of user on site (if it exists)
    416 464   results_site["url_user"] = url
    skipped 11 lines
    428 476   )
    429 477   
    430 478   for k, v in site.get_params.items():
    431  - url_probe += f'&{k}={v}'
     479 + url_probe += f"&{k}={v}"
    432 480   
    433  - if site.check_type == 'status_code' and site.request_head_only:
     481 + if site.check_type == "status_code" and site.request_head_only:
    434 482   # In most cases when we are detecting by status code,
    435 483   # it is not necessary to get the entire body: we can
    436 484   # detect fine with just the HEAD response.
    skipped 14 lines
    451 499   # The final result of the request will be what is available.
    452 500   allow_redirects = True
    453 501   
    454  - future = request_method(url=url_probe, headers=headers,
    455  - allow_redirects=allow_redirects,
    456  - timeout=timeout,
    457  - )
     502 + future = request_method(
     503 + url=url_probe,
     504 + headers=headers,
     505 + allow_redirects=allow_redirects,
     506 + timeout=timeout,
     507 + )
    458 508   
    459 509   # Store future in data for access later
    460 510   # TODO: move to separate obj
    skipped 4 lines
    465 515   
    466 516   coroutines = []
    467 517   for sitename, result_obj in results_total.items():
    468  - coroutines.append((update_site_dict_from_response, [sitename, site_dict, result_obj, logger, query_notify], {}))
     518 + coroutines.append(
     519 + (
     520 + update_site_dict_from_response,
     521 + [sitename, site_dict, result_obj, logger, query_notify],
     522 + {},
     523 + )
     524 + )
    469 525   
    470 526   if no_progressbar:
    471 527   executor = AsyncioSimpleExecutor(logger=logger)
    472 528   else:
    473  - executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=max_connections, timeout=timeout+0.5)
     529 + executor = AsyncioProgressbarQueueExecutor(
     530 + logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
     531 + )
    474 532   
    475 533   results = await executor.run(coroutines)
    476 534   
    477 535   await session.close()
    478 536   
    479  - # TODO: move to separate function
    480  - errors = {}
    481  - for el in results:
    482  - if not el:
    483  - continue
    484  - _, r = el
    485  - if r and isinstance(r, dict) and r.get('status'):
    486  - if not isinstance(r['status'], QueryResult):
    487  - continue
    488  - 
    489  - err = r['status'].error
    490  - if not err:
    491  - continue
    492  - errors[err.type] = errors.get(err.type, 0) + 1
    493  - 
    494  - for err, count in sorted(errors.items(), key=lambda x: x[1], reverse=True):
    495  - logger.warning(f'Errors of type "{err}": {count}')
    496  - 
    497 537   # Notify caller that all queries are finished.
    498 538   query_notify.finish()
    499 539   
    skipped 37 lines
    537 577   
    538 578  async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
    539 579   changes = {
    540  - 'disabled': False,
     580 + "disabled": False,
    541 581   }
    542 582   
    543 583   try:
    skipped 6 lines
    550 590   logger.error(site.__dict__)
    551 591   check_data = []
    552 592   
    553  - logger.info(f'Checking {site.name}...')
     593 + logger.info(f"Checking {site.name}...")
    554 594   
    555 595   for username, status in check_data:
    556 596   async with semaphore:
    skipped 11 lines
    568 608   # TODO: make normal checking
    569 609   if site.name not in results_dict:
    570 610   logger.info(results_dict)
    571  - changes['disabled'] = True
     611 + changes["disabled"] = True
    572 612   continue
    573 613   
    574  - result = results_dict[site.name]['status']
     614 + result = results_dict[site.name]["status"]
    575 615   
    576 616   site_status = result.status
    577 617   
    skipped 2 lines
    580 620   msgs = site.absence_strs
    581 621   etype = site.check_type
    582 622   logger.warning(
    583  - f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
     623 + f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
     624 + )
    584 625   # don't disable in case of available username
    585 626   if status == QueryStatus.CLAIMED:
    586  - changes['disabled'] = True
     627 + changes["disabled"] = True
    587 628   elif status == QueryStatus.CLAIMED:
    588  - logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
     629 + logger.warning(
     630 + f"Not found `{username}` in {site.name}, must be claimed"
     631 + )
    589 632   logger.info(results_dict[site.name])
    590  - changes['disabled'] = True
     633 + changes["disabled"] = True
    591 634   else:
    592  - logger.warning(f'Found `{username}` in {site.name}, must be available')
     635 + logger.warning(f"Found `{username}` in {site.name}, must be available")
    593 636   logger.info(results_dict[site.name])
    594  - changes['disabled'] = True
     637 + changes["disabled"] = True
    595 638   
    596  - logger.info(f'Site {site.name} checking is finished')
     639 + logger.info(f"Site {site.name} checking is finished")
    597 640   
    598  - if changes['disabled'] != site.disabled:
    599  - site.disabled = changes['disabled']
     641 + if changes["disabled"] != site.disabled:
     642 + site.disabled = changes["disabled"]
    600 643   db.update_site(site)
    601 644   if not silent:
    602  - action = 'Disabled' if site.disabled else 'Enabled'
    603  - print(f'{action} site {site.name}...')
     645 + action = "Disabled" if site.disabled else "Enabled"
     646 + print(f"{action} site {site.name}...")
    604 647   
    605 648   return changes
    606 649   
    607 650   
    608  -async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
    609  - max_connections=10) -> bool:
     651 +async def self_check(
     652 + db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10
     653 +) -> bool:
    610 654   sem = asyncio.Semaphore(max_connections)
    611 655   tasks = []
    612 656   all_sites = site_data
    skipped 15 lines
    628 672   total_disabled = disabled_new_count - disabled_old_count
    629 673   
    630 674   if total_disabled >= 0:
    631  - message = 'Disabled'
     675 + message = "Disabled"
    632 676   else:
    633  - message = 'Enabled'
     677 + message = "Enabled"
    634 678   total_disabled *= -1
    635 679   
    636 680   if not silent:
    637 681   print(
    638  - f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
     682 + f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
     683 + "Run with `--info` flag to get more information"
     684 + )
    639 685   
    640 686   return total_disabled != 0
    641 687   
  • ■ ■ ■ ■ ■ ■
    maigret/errors.py
     1 +from typing import Dict, List, Any
     2 + 
     3 +from .result import QueryResult
     4 + 
     5 + 
     6 +# error got as a result of completed search query
     7 +class CheckError:
     8 + _type = 'Unknown'
     9 + _desc = ''
     10 + 
     11 + def __init__(self, typename, desc=''):
     12 + self._type = typename
     13 + self._desc = desc
     14 + 
     15 + def __str__(self):
     16 + if not self._desc:
     17 + return f'{self._type} error'
     18 + 
     19 + return f'{self._type} error: {self._desc}'
     20 + 
     21 + @property
     22 + def type(self):
     23 + return self._type
     24 + 
     25 + @property
     26 + def desc(self):
     27 + return self._desc
     28 + 
     29 + 
     30 +COMMON_ERRORS = {
     31 + '<title>Attention Required! | Cloudflare</title>': CheckError(
     32 + 'Captcha', 'Cloudflare'
     33 + ),
     34 + 'Please stand by, while we are checking your browser': CheckError(
     35 + 'Bot protection', 'Cloudflare'
     36 + ),
     37 + '<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
     38 + 'document.getElementById(\'validate_form_submit\').disabled=true': CheckError(
     39 + 'Captcha', 'Mail.ru'
     40 + ),
     41 + 'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': CheckError(
     42 + 'Bot protection', 'Blazingfast'
     43 + ),
     44 + '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': CheckError(
     45 + 'Resolving', 'MegaFon 404 page'
     46 + ),
     47 + 'Доступ к информационному ресурсу ограничен на основании Федерального закона': CheckError(
     48 + 'Censorship', 'MGTS'
     49 + ),
     50 + 'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
     51 +}
     52 + 
     53 +ERRORS_TYPES = {
     54 + 'Captcha': 'Try to switch to another IP address or to use service cookies',
     55 + 'Bot protection': 'Try to switch to another IP address',
     56 + 'Censorship': 'switch to another internet service provider',
     57 + 'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
     58 +}
     59 + 
     60 +THRESHOLD = 3 # percent
     61 + 
     62 + 
     63 +def is_important(err_data):
     64 + return err_data['perc'] >= THRESHOLD
     65 + 
     66 + 
     67 +def is_not_permanent(err_data):
     68 + return True
     69 + 
     70 + 
     71 +def detect(text):
     72 + for flag, err in COMMON_ERRORS.items():
     73 + if flag in text:
     74 + return err
     75 + return None
     76 + 
     77 + 
     78 +def solution_of(err_type) -> str:
     79 + return ERRORS_TYPES.get(err_type, '')
     80 + 
     81 + 
     82 +def extract_and_group(search_res: dict) -> List[Dict[str, Any]]:
     83 + errors_counts: Dict[str, int] = {}
     84 + for r in search_res:
     85 + if r and isinstance(r, dict) and r.get('status'):
     86 + if not isinstance(r['status'], QueryResult):
     87 + continue
     88 + 
     89 + err = r['status'].error
     90 + if not err:
     91 + continue
     92 + errors_counts[err.type] = errors_counts.get(err.type, 0) + 1
     93 + 
     94 + counts = []
     95 + for err, count in sorted(errors_counts.items(), key=lambda x: x[1], reverse=True):
     96 + counts.append(
     97 + {
     98 + 'err': err,
     99 + 'count': count,
     100 + 'perc': round(count / len(search_res), 2) * 100,
     101 + }
     102 + )
     103 + 
     104 + return counts
     105 + 
  • ■ ■ ■ ■ ■ ■
    maigret/executors.py
    skipped 1 lines
    2 2  import time
    3 3  import tqdm
    4 4  import sys
    5  -from typing import Iterable
     5 +from typing import Iterable, Any, List
    6 6   
    7 7  from .types import QueryDraft
    8 8   
    skipped 91 lines
    100 100   self.queue.task_done()
    101 101   
    102 102   async def _run(self, queries: Iterable[QueryDraft]):
    103  - self.results = []
     103 + self.results: List[Any] = []
    104 104   
    105 105   queries_list = list(queries)
    106 106   
    107 107   min_workers = min(len(queries_list), self.workers_count)
    108 108   
    109  - workers = [create_task_func()(self.worker())
    110  - for _ in range(min_workers)]
     109 + workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
    111 110   
    112 111   self.progress = self.progress_func(total=len(queries_list))
    113 112   for t in queries_list:
    skipped 7 lines
  • ■ ■ ■ ■ ■ ■
    maigret/maigret.py
    skipped 11 lines
    12 12  import requests
    13 13  from socid_extractor import extract, parse, __version__ as socid_version
    14 14   
    15  -from .checking import timeout_check, supported_recursive_search_ids, self_check, unsupported_characters, maigret
     15 +from .checking import (
     16 + timeout_check,
     17 + supported_recursive_search_ids,
     18 + self_check,
     19 + unsupported_characters,
     20 + maigret,
     21 +)
     22 +from . import errors
    16 23  from .notify import QueryNotifyPrint
    17  -from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
    18  - generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
    19  - save_json_report
     24 +from .report import (
     25 + save_csv_report,
     26 + save_xmind_report,
     27 + save_html_report,
     28 + save_pdf_report,
     29 + generate_report_context,
     30 + save_txt_report,
     31 + SUPPORTED_JSON_REPORT_FORMATS,
     32 + check_supported_json_format,
     33 + save_json_report,
     34 +)
    20 35  from .sites import MaigretDatabase
    21 36  from .submit import submit_dialog
    22 37  from .utils import get_dict_ascii_tree
    skipped 1 lines
    24 39  __version__ = '0.1.19'
    25 40   
    26 41   
     42 +def notify_about_errors(search_results, query_notify):
     43 + errs = errors.extract_and_group(search_results.values())
     44 + was_errs_displayed = False
     45 + for e in errs:
     46 + if not errors.is_important(e):
     47 + continue
     48 + text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
     49 + solution = errors.solution_of(e['err'])
     50 + if solution:
     51 + text = '. '.join([text, solution])
     52 + 
     53 + query_notify.warning(text, '!')
     54 + was_errs_displayed = True
     55 + 
     56 + if was_errs_displayed:
     57 + query_notify.warning(
     58 + 'You can see detailed site check errors with a flag `--print-errors`'
     59 + )
     60 + 
     61 + 
    27 62  async def main():
    28  - version_string = '\n'.join([
    29  - f'%(prog)s {__version__}',
    30  - f'Socid-extractor: {socid_version}',
    31  - f'Aiohttp: {aiohttp.__version__}',
    32  - f'Requests: {requests.__version__}',
    33  - f'Python: {platform.python_version()}',
    34  - ])
     63 + version_string = '\n'.join(
     64 + [
     65 + f'%(prog)s {__version__}',
     66 + f'Socid-extractor: {socid_version}',
     67 + f'Aiohttp: {aiohttp.__version__}',
     68 + f'Requests: {requests.__version__}',
     69 + f'Python: {platform.python_version()}',
     70 + ]
     71 + )
    35 72   
    36  - parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
    37  - description=f"Maigret v{__version__}"
    38  - )
    39  - parser.add_argument("--version",
    40  - action="version", version=version_string,
    41  - help="Display version information and dependencies."
    42  - )
    43  - parser.add_argument("--info", "-vv",
    44  - action="store_true", dest="info", default=False,
    45  - help="Display service information."
    46  - )
    47  - parser.add_argument("--verbose", "-v",
    48  - action="store_true", dest="verbose", default=False,
    49  - help="Display extra information and metrics."
    50  - )
    51  - parser.add_argument("-d", "--debug", "-vvv",
    52  - action="store_true", dest="debug", default=False,
    53  - help="Saving debugging information and sites responses in debug.txt."
    54  - )
    55  - parser.add_argument("--site",
    56  - action="append", metavar='SITE_NAME',
    57  - dest="site_list", default=[],
    58  - help="Limit analysis to just the listed sites (use several times to specify more than one)"
    59  - )
    60  - parser.add_argument("--proxy", "-p", metavar='PROXY_URL',
    61  - action="store", dest="proxy", default=None,
    62  - help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
    63  - )
    64  - parser.add_argument("--db", metavar="DB_FILE",
    65  - dest="db_file", default=None,
    66  - help="Load Maigret database from a JSON file or an online, valid, JSON file.")
    67  - parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
    68  - dest="cookie_file", default=None,
    69  - help="File with cookies.")
    70  - parser.add_argument("--timeout",
    71  - action="store", metavar='TIMEOUT',
    72  - dest="timeout", type=timeout_check, default=10,
    73  - help="Time (in seconds) to wait for response to requests."
    74  - "Default timeout of 10.0s. "
    75  - "A longer timeout will be more likely to get results from slow sites."
    76  - "On the other hand, this may cause a long delay to gather all results."
    77  - )
    78  - parser.add_argument("-n", "--max-connections",
    79  - action="store", type=int,
    80  - dest="connections", default=100,
    81  - help="Allowed number of concurrent connections."
    82  - )
    83  - parser.add_argument("-a", "--all-sites",
    84  - action="store_true", dest="all_sites", default=False,
    85  - help="Use all sites for scan."
    86  - )
    87  - parser.add_argument("--top-sites",
    88  - action="store", default=500, type=int,
    89  - help="Count of sites for scan ranked by Alexa Top (default: 500)."
    90  - )
    91  - parser.add_argument("--print-not-found",
    92  - action="store_true", dest="print_not_found", default=False,
    93  - help="Print sites where the username was not found."
    94  - )
    95  - parser.add_argument("--print-errors",
    96  - action="store_true", dest="print_check_errors", default=False,
    97  - help="Print errors messages: connection, captcha, site country ban, etc."
    98  - )
    99  - parser.add_argument("--submit", metavar='EXISTING_USER_URL',
    100  - type=str, dest="new_site_to_submit", default=False,
    101  - help="URL of existing profile in new site to submit."
    102  - )
    103  - parser.add_argument("--no-color",
    104  - action="store_true", dest="no_color", default=False,
    105  - help="Don't color terminal output"
    106  - )
    107  - parser.add_argument("--no-progressbar",
    108  - action="store_true", dest="no_progressbar", default=False,
    109  - help="Don't show progressbar."
    110  - )
    111  - parser.add_argument("--browse", "-b",
    112  - action="store_true", dest="browse", default=False,
    113  - help="Browse to all results on default bowser."
    114  - )
    115  - parser.add_argument("--no-recursion",
    116  - action="store_true", dest="disable_recursive_search", default=False,
    117  - help="Disable recursive search by additional data extracted from pages."
    118  - )
    119  - parser.add_argument("--no-extracting",
    120  - action="store_true", dest="disable_extracting", default=False,
    121  - help="Disable parsing pages for additional data and other usernames."
    122  - )
    123  - parser.add_argument("--self-check",
    124  - action="store_true", default=False,
    125  - help="Do self check for sites and database and disable non-working ones."
    126  - )
    127  - parser.add_argument("--stats",
    128  - action="store_true", default=False,
    129  - help="Show database statistics."
    130  - )
    131  - parser.add_argument("--use-disabled-sites",
    132  - action="store_true", default=False,
    133  - help="Use disabled sites to search (may cause many false positives)."
    134  - )
    135  - parser.add_argument("--parse",
    136  - dest="parse_url", default='',
    137  - help="Parse page by URL and extract username and IDs to use for search."
    138  - )
    139  - parser.add_argument("--id-type",
    140  - dest="id_type", default='username',
    141  - help="Specify identifier(s) type (default: username)."
    142  - )
    143  - parser.add_argument("--ignore-ids",
    144  - action="append", metavar='IGNORED_IDS',
    145  - dest="ignore_ids_list", default=[],
    146  - help="Do not make search by the specified username or other ids."
    147  - )
    148  - parser.add_argument("username",
    149  - nargs='+', metavar='USERNAMES',
    150  - action="store",
    151  - help="One or more usernames to check with social networks."
    152  - )
    153  - parser.add_argument("--tags",
    154  - dest="tags", default='',
    155  - help="Specify tags of sites."
    156  - )
     73 + parser = ArgumentParser(
     74 + formatter_class=RawDescriptionHelpFormatter,
     75 + description=f"Maigret v{__version__}",
     76 + )
     77 + parser.add_argument(
     78 + "--version",
     79 + action="version",
     80 + version=version_string,
     81 + help="Display version information and dependencies.",
     82 + )
     83 + parser.add_argument(
     84 + "--info",
     85 + "-vv",
     86 + action="store_true",
     87 + dest="info",
     88 + default=False,
     89 + help="Display service information.",
     90 + )
     91 + parser.add_argument(
     92 + "--verbose",
     93 + "-v",
     94 + action="store_true",
     95 + dest="verbose",
     96 + default=False,
     97 + help="Display extra information and metrics.",
     98 + )
     99 + parser.add_argument(
     100 + "-d",
     101 + "--debug",
     102 + "-vvv",
     103 + action="store_true",
     104 + dest="debug",
     105 + default=False,
     106 + help="Saving debugging information and sites responses in debug.txt.",
     107 + )
     108 + parser.add_argument(
     109 + "--site",
     110 + action="append",
     111 + metavar='SITE_NAME',
     112 + dest="site_list",
     113 + default=[],
     114 + help="Limit analysis to just the listed sites (use several times to specify more than one)",
     115 + )
     116 + parser.add_argument(
     117 + "--proxy",
     118 + "-p",
     119 + metavar='PROXY_URL',
     120 + action="store",
     121 + dest="proxy",
     122 + default=None,
     123 + help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
     124 + )
     125 + parser.add_argument(
     126 + "--db",
     127 + metavar="DB_FILE",
     128 + dest="db_file",
     129 + default=None,
     130 + help="Load Maigret database from a JSON file or an online, valid, JSON file.",
     131 + )
     132 + parser.add_argument(
     133 + "--cookies-jar-file",
     134 + metavar="COOKIE_FILE",
     135 + dest="cookie_file",
     136 + default=None,
     137 + help="File with cookies.",
     138 + )
     139 + parser.add_argument(
     140 + "--timeout",
     141 + action="store",
     142 + metavar='TIMEOUT',
     143 + dest="timeout",
     144 + type=timeout_check,
     145 + default=30,
     146 + help="Time (in seconds) to wait for response to requests. "
     147 + "Default timeout of 30.0s. "
     148 + "A longer timeout will be more likely to get results from slow sites. "
     149 + "On the other hand, this may cause a long delay to gather all results. ",
     150 + )
     151 + parser.add_argument(
     152 + "-n",
     153 + "--max-connections",
     154 + action="store",
     155 + type=int,
     156 + dest="connections",
     157 + default=100,
     158 + help="Allowed number of concurrent connections.",
     159 + )
     160 + parser.add_argument(
     161 + "-a",
     162 + "--all-sites",
     163 + action="store_true",
     164 + dest="all_sites",
     165 + default=False,
     166 + help="Use all sites for scan.",
     167 + )
     168 + parser.add_argument(
     169 + "--top-sites",
     170 + action="store",
     171 + default=500,
     172 + type=int,
     173 + help="Count of sites for scan ranked by Alexa Top (default: 500).",
     174 + )
     175 + parser.add_argument(
     176 + "--print-not-found",
     177 + action="store_true",
     178 + dest="print_not_found",
     179 + default=False,
     180 + help="Print sites where the username was not found.",
     181 + )
     182 + parser.add_argument(
     183 + "--print-errors",
     184 + action="store_true",
     185 + dest="print_check_errors",
     186 + default=False,
     187 + help="Print errors messages: connection, captcha, site country ban, etc.",
     188 + )
     189 + parser.add_argument(
     190 + "--submit",
     191 + metavar='EXISTING_USER_URL',
     192 + type=str,
     193 + dest="new_site_to_submit",
     194 + default=False,
     195 + help="URL of existing profile in new site to submit.",
     196 + )
     197 + parser.add_argument(
     198 + "--no-color",
     199 + action="store_true",
     200 + dest="no_color",
     201 + default=False,
     202 + help="Don't color terminal output",
     203 + )
     204 + parser.add_argument(
     205 + "--no-progressbar",
     206 + action="store_true",
     207 + dest="no_progressbar",
     208 + default=False,
     209 + help="Don't show progressbar.",
     210 + )
     211 + parser.add_argument(
     212 + "--browse",
     213 + "-b",
     214 + action="store_true",
     215 + dest="browse",
     216 + default=False,
     217 + help="Browse to all results on default bowser.",
     218 + )
     219 + parser.add_argument(
     220 + "--no-recursion",
     221 + action="store_true",
     222 + dest="disable_recursive_search",
     223 + default=False,
     224 + help="Disable recursive search by additional data extracted from pages.",
     225 + )
     226 + parser.add_argument(
     227 + "--no-extracting",
     228 + action="store_true",
     229 + dest="disable_extracting",
     230 + default=False,
     231 + help="Disable parsing pages for additional data and other usernames.",
     232 + )
     233 + parser.add_argument(
     234 + "--self-check",
     235 + action="store_true",
     236 + default=False,
     237 + help="Do self check for sites and database and disable non-working ones.",
     238 + )
     239 + parser.add_argument(
     240 + "--stats", action="store_true", default=False, help="Show database statistics."
     241 + )
     242 + parser.add_argument(
     243 + "--use-disabled-sites",
     244 + action="store_true",
     245 + default=False,
     246 + help="Use disabled sites to search (may cause many false positives).",
     247 + )
     248 + parser.add_argument(
     249 + "--parse",
     250 + dest="parse_url",
     251 + default='',
     252 + help="Parse page by URL and extract username and IDs to use for search.",
     253 + )
     254 + parser.add_argument(
     255 + "--id-type",
     256 + dest="id_type",
     257 + default='username',
     258 + help="Specify identifier(s) type (default: username).",
     259 + )
     260 + parser.add_argument(
     261 + "--ignore-ids",
     262 + action="append",
     263 + metavar='IGNORED_IDS',
     264 + dest="ignore_ids_list",
     265 + default=[],
     266 + help="Do not make search by the specified username or other ids.",
     267 + )
     268 + parser.add_argument(
     269 + "username",
     270 + nargs='+',
     271 + metavar='USERNAMES',
     272 + action="store",
     273 + help="One or more usernames to check with social networks.",
     274 + )
     275 + parser.add_argument(
     276 + "--tags", dest="tags", default='', help="Specify tags of sites."
     277 + )
    157 278   # reports options
    158  - parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports",
    159  - help="If using multiple usernames, the output of the results will be saved to this folder."
    160  - )
    161  - parser.add_argument("-T", "--txt",
    162  - action="store_true", dest="txt", default=False,
    163  - help="Create a TXT report (one report per username)."
    164  - )
    165  - parser.add_argument("-C", "--csv",
    166  - action="store_true", dest="csv", default=False,
    167  - help="Create a CSV report (one report per username)."
    168  - )
    169  - parser.add_argument("-H", "--html",
    170  - action="store_true", dest="html", default=False,
    171  - help="Create an HTML report file (general report on all usernames)."
    172  - )
    173  - parser.add_argument("-X", "--xmind",
    174  - action="store_true",
    175  - dest="xmind", default=False,
    176  - help="Generate an XMind 8 mindmap report (one report per username)."
    177  - )
    178  - parser.add_argument("-P", "--pdf",
    179  - action="store_true",
    180  - dest="pdf", default=False,
    181  - help="Generate a PDF report (general report on all usernames)."
    182  - )
    183  - parser.add_argument("-J", "--json",
    184  - action="store", metavar='REPORT_TYPE',
    185  - dest="json", default='', type=check_supported_json_format,
    186  - help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
    187  - " (one report per username)."
    188  - )
     279 + parser.add_argument(
     280 + "--folderoutput",
     281 + "-fo",
     282 + dest="folderoutput",
     283 + default="reports",
     284 + help="If using multiple usernames, the output of the results will be saved to this folder.",
     285 + )
     286 + parser.add_argument(
     287 + "-T",
     288 + "--txt",
     289 + action="store_true",
     290 + dest="txt",
     291 + default=False,
     292 + help="Create a TXT report (one report per username).",
     293 + )
     294 + parser.add_argument(
     295 + "-C",
     296 + "--csv",
     297 + action="store_true",
     298 + dest="csv",
     299 + default=False,
     300 + help="Create a CSV report (one report per username).",
     301 + )
     302 + parser.add_argument(
     303 + "-H",
     304 + "--html",
     305 + action="store_true",
     306 + dest="html",
     307 + default=False,
     308 + help="Create an HTML report file (general report on all usernames).",
     309 + )
     310 + parser.add_argument(
     311 + "-X",
     312 + "--xmind",
     313 + action="store_true",
     314 + dest="xmind",
     315 + default=False,
     316 + help="Generate an XMind 8 mindmap report (one report per username).",
     317 + )
     318 + parser.add_argument(
     319 + "-P",
     320 + "--pdf",
     321 + action="store_true",
     322 + dest="pdf",
     323 + default=False,
     324 + help="Generate a PDF report (general report on all usernames).",
     325 + )
     326 + parser.add_argument(
     327 + "-J",
     328 + "--json",
     329 + action="store",
     330 + metavar='REPORT_TYPE',
     331 + dest="json",
     332 + default='',
     333 + type=check_supported_json_format,
     334 + help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
     335 + " (one report per username).",
     336 + )
    189 337   
    190 338   args = parser.parse_args()
    191 339   
    skipped 2 lines
    194 342   logging.basicConfig(
    195 343   format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
    196 344   datefmt='%H:%M:%S',
    197  - level=log_level
     345 + level=log_level,
    198 346   )
    199 347   
    200 348   if args.debug:
    skipped 10 lines
    211 359   usernames = {
    212 360   u: args.id_type
    213 361   for u in args.username
    214  - if u not in ['-']
    215  - and u not in args.ignore_ids_list
     362 + if u not in ['-'] and u not in args.ignore_ids_list
    216 363   }
    217 364   
    218 365   parsing_enabled = not args.disable_extracting
    skipped 9 lines
    228 375   try:
    229 376   # temporary workaround for URL mutations MVP
    230 377   from socid_extractor import mutate_url
     378 + 
    231 379   reqs += list(mutate_url(args.parse_url))
    232  - except:
     380 + except Exception as e:
     381 + logger.warning(e)
    233 382   pass
    234 383   
    235 384   for req in reqs:
    skipped 15 lines
    251 400   args.tags = list(set(str(args.tags).split(',')))
    252 401   
    253 402   if args.db_file is None:
    254  - args.db_file = \
    255  - os.path.join(os.path.dirname(os.path.realpath(__file__)),
    256  - "resources/data.json"
    257  - )
     403 + args.db_file = os.path.join(
     404 + os.path.dirname(os.path.realpath(__file__)), "resources/data.json"
     405 + )
    258 406   
    259 407   if args.top_sites == 0 or args.all_sites:
    260 408   args.top_sites = sys.maxsize
    261 409   
    262 410   # Create notify object for query results.
    263  - query_notify = QueryNotifyPrint(result=None,
    264  - verbose=args.verbose,
    265  - print_found_only=not args.print_not_found,
    266  - skip_check_errors=not args.print_check_errors,
    267  - color=not args.no_color)
     411 + query_notify = QueryNotifyPrint(
     412 + result=None,
     413 + verbose=args.verbose,
     414 + print_found_only=not args.print_not_found,
     415 + skip_check_errors=not args.print_check_errors,
     416 + color=not args.no_color,
     417 + )
    268 418   
    269 419   # Create object with all information about sites we are aware of.
    270 420   db = MaigretDatabase().load_from_file(args.db_file)
    271  - get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
    272  - names=args.site_list,
    273  - disabled=False, id_type=x)
     421 + get_top_sites_for_id = lambda x: db.ranked_sites_dict(
     422 + top=args.top_sites,
     423 + tags=args.tags,
     424 + names=args.site_list,
     425 + disabled=False,
     426 + id_type=x,
     427 + )
    274 428   
    275 429   site_data = get_top_sites_for_id(args.id_type)
    276 430   
    277 431   if args.new_site_to_submit:
    278  - is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file, logger)
     432 + is_submitted = await submit_dialog(
     433 + db, args.new_site_to_submit, args.cookie_file, logger
     434 + )
    279 435   if is_submitted:
    280 436   db.save_to_file(args.db_file)
    281 437   
    282 438   # Database self-checking
    283 439   if args.self_check:
    284 440   print('Maigret sites database self-checking...')
    285  - is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
     441 + is_need_update = await self_check(
     442 + db, site_data, logger, max_connections=args.connections
     443 + )
    286 444   if is_need_update:
    287 445   if input('Do you want to save changes permanently? [Yn]\n').lower() == 'y':
    288 446   db.save_to_file(args.db_file)
    skipped 25 lines
    314 472   query_notify.warning('No sites to check, exiting!')
    315 473   sys.exit(2)
    316 474   else:
    317  - query_notify.warning(f'Starting a search on top {len(site_data)} sites from the Maigret database...')
     475 + query_notify.warning(
     476 + f'Starting a search on top {len(site_data)} sites from the Maigret database...'
     477 + )
    318 478   if not args.all_sites:
    319  - query_notify.warning(f'You can run search by full list of sites with flag `-a`', '!')
     479 + query_notify.warning(
     480 + 'You can run search by full list of sites with flag `-a`', '!'
     481 + )
    320 482   
    321 483   already_checked = set()
    322 484   general_results = []
    skipped 8 lines
    331 493   already_checked.add(username.lower())
    332 494   
    333 495   if username in args.ignore_ids_list:
    334  - query_notify.warning(f'Skip a search by username {username} cause it\'s marked as ignored.')
     496 + query_notify.warning(
     497 + f'Skip a search by username {username} cause it\'s marked as ignored.'
     498 + )
    335 499   continue
    336 500   
    337 501   # check for characters do not supported by sites generally
    338  - found_unsupported_chars = set(unsupported_characters).intersection(set(username))
     502 + found_unsupported_chars = set(unsupported_characters).intersection(
     503 + set(username)
     504 + )
    339 505   
    340 506   if found_unsupported_chars:
    341  - pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
     507 + pretty_chars_str = ','.join(
     508 + map(lambda s: f'"{s}"', found_unsupported_chars)
     509 + )
    342 510   query_notify.warning(
    343  - f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
     511 + f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"'
     512 + )
    344 513   continue
    345 514   
    346 515   sites_to_check = get_top_sites_for_id(id_type)
    347 516   
    348  - results = await maigret(username=username,
    349  - site_dict=dict(sites_to_check),
    350  - query_notify=query_notify,
    351  - proxy=args.proxy,
    352  - timeout=args.timeout,
    353  - is_parsing_enabled=parsing_enabled,
    354  - id_type=id_type,
    355  - debug=args.verbose,
    356  - logger=logger,
    357  - cookies=args.cookie_file,
    358  - forced=args.use_disabled_sites,
    359  - max_connections=args.connections,
    360  - no_progressbar=args.no_progressbar,
    361  - )
     517 + results = await maigret(
     518 + username=username,
     519 + site_dict=dict(sites_to_check),
     520 + query_notify=query_notify,
     521 + proxy=args.proxy,
     522 + timeout=args.timeout,
     523 + is_parsing_enabled=parsing_enabled,
     524 + id_type=id_type,
     525 + debug=args.verbose,
     526 + logger=logger,
     527 + cookies=args.cookie_file,
     528 + forced=args.use_disabled_sites,
     529 + max_connections=args.connections,
     530 + no_progressbar=args.no_progressbar,
     531 + )
     532 + 
     533 + notify_about_errors(results, query_notify)
    362 534   
    363 535   general_results.append((username, id_type, results))
    364 536   
    skipped 32 lines
    397 569   query_notify.warning(f'TXT report for {username} saved in {filename}')
    398 570   
    399 571   if args.json:
    400  - filename = report_filepath_tpl.format(username=username, postfix=f'_{args.json}.json')
     572 + filename = report_filepath_tpl.format(
     573 + username=username, postfix=f'_{args.json}.json'
     574 + )
    401 575   save_json_report(filename, username, results, report_type=args.json)
    402  - query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
     576 + query_notify.warning(
     577 + f'JSON {args.json} report for {username} saved in {filename}'
     578 + )
    403 579   
    404 580   # reporting for all the result
    405 581   if general_results:
    skipped 31 lines
  • ■ ■ ■ ■ ■ ■
    maigret/notify.py
    skipped 10 lines
    11 11  from .utils import get_dict_ascii_tree
    12 12   
    13 13   
    14  -class QueryNotify():
     14 +class QueryNotify:
    15 15   """Query Notify Object.
    16 16   
    17 17   Base class that describes methods available to notify the results of
    skipped 21 lines
    39 39   
    40 40   return
    41 41   
    42  - def start(self, message=None, id_type='username'):
     42 + def start(self, message=None, id_type="username"):
    43 43   """Notify Start.
    44 44   
    45 45   Notify method for start of query. This method will be called before
    skipped 70 lines
    116 116   Query notify class that prints results.
    117 117   """
    118 118   
    119  - def __init__(self, result=None, verbose=False, print_found_only=False,
    120  - skip_check_errors=False, color=True):
     119 + def __init__(
     120 + self,
     121 + result=None,
     122 + verbose=False,
     123 + print_found_only=False,
     124 + skip_check_errors=False,
     125 + color=True,
     126 + ):
    121 127   """Create Query Notify Print Object.
    122 128   
    123 129   Contains information about a specific method of notifying the results
    skipped 38 lines
    162 168   
    163 169   title = f"Checking {id_type}"
    164 170   if self.color:
    165  - print(Style.BRIGHT + Fore.GREEN + "[" +
    166  - Fore.YELLOW + "*" +
    167  - Fore.GREEN + f"] {title}" +
    168  - Fore.WHITE + f" {message}" +
    169  - Fore.GREEN + " on:")
     171 + print(
     172 + Style.BRIGHT
     173 + + Fore.GREEN
     174 + + "["
     175 + + Fore.YELLOW
     176 + + "*"
     177 + + Fore.GREEN
     178 + + f"] {title}"
     179 + + Fore.WHITE
     180 + + f" {message}"
     181 + + Fore.GREEN
     182 + + " on:"
     183 + )
    170 184   else:
    171 185   print(f"[*] {title} {message} on:")
    172 186   
    173  - def warning(self, message, symbol='-'):
    174  - msg = f'[{symbol}] {message}'
     187 + def warning(self, message, symbol="-"):
     188 + msg = f"[{symbol}] {message}"
    175 189   if self.color:
    176 190   print(Style.BRIGHT + Fore.YELLOW + msg)
    177 191   else:
    178 192   print(msg)
    179  - 
    180 193   
    181 194   def update(self, result, is_similar=False):
    182 195   """Notify Update.
    skipped 13 lines
    196 209   if not self.result.ids_data:
    197 210   ids_data_text = ""
    198 211   else:
    199  - ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), ' ')
     212 + ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
    200 213   
    201  - def make_colored_terminal_notify(status, text, status_color, text_color, appendix):
     214 + def make_colored_terminal_notify(
     215 + status, text, status_color, text_color, appendix
     216 + ):
    202 217   text = [
    203  - f'{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]' +
    204  - f'{text_color} {text}: {Style.RESET_ALL}' +
    205  - f'{appendix}'
     218 + f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
     219 + + f"{text_color} {text}: {Style.RESET_ALL}"
     220 + + f"{appendix}"
    206 221   ]
    207  - return ''.join(text)
     222 + return "".join(text)
    208 223   
    209 224   def make_simple_terminal_notify(status, text, appendix):
    210  - return f'[{status}] {text}: {appendix}'
     225 + return f"[{status}] {text}: {appendix}"
    211 226   
    212 227   def make_terminal_notify(is_colored=True, *args):
    213 228   if is_colored:
    skipped 6 lines
    220 235   # Output to the terminal is desired.
    221 236   if result.status == QueryStatus.CLAIMED:
    222 237   color = Fore.BLUE if is_similar else Fore.GREEN
    223  - status = '?' if is_similar else '+'
     238 + status = "?" if is_similar else "+"
    224 239   notify = make_terminal_notify(
    225 240   self.color,
    226  - status, result.site_name,
    227  - color, color,
    228  - result.site_url_user + ids_data_text
     241 + status,
     242 + result.site_name,
     243 + color,
     244 + color,
     245 + result.site_url_user + ids_data_text,
    229 246   )
    230 247   elif result.status == QueryStatus.AVAILABLE:
    231 248   if not self.print_found_only:
    232 249   notify = make_terminal_notify(
    233 250   self.color,
    234  - '-', result.site_name,
    235  - Fore.RED, Fore.YELLOW,
    236  - 'Not found!' + ids_data_text
     251 + "-",
     252 + result.site_name,
     253 + Fore.RED,
     254 + Fore.YELLOW,
     255 + "Not found!" + ids_data_text,
    237 256   )
    238 257   elif result.status == QueryStatus.UNKNOWN:
    239 258   if not self.skip_check_errors:
    240 259   notify = make_terminal_notify(
    241 260   self.color,
    242  - '?', result.site_name,
    243  - Fore.RED, Fore.RED,
    244  - str(self.result.error) + ids_data_text
     261 + "?",
     262 + result.site_name,
     263 + Fore.RED,
     264 + Fore.RED,
     265 + str(self.result.error) + ids_data_text,
    245 266   )
    246 267   elif result.status == QueryStatus.ILLEGAL:
    247 268   if not self.print_found_only:
    248  - text = 'Illegal Username Format For This Site!'
     269 + text = "Illegal Username Format For This Site!"
    249 270   notify = make_terminal_notify(
    250 271   self.color,
    251  - '-', result.site_name,
    252  - Fore.RED, Fore.YELLOW,
    253  - text + ids_data_text
     272 + "-",
     273 + result.site_name,
     274 + Fore.RED,
     275 + Fore.YELLOW,
     276 + text + ids_data_text,
    254 277   )
    255 278   else:
    256 279   # It should be impossible to ever get here...
    257  - raise ValueError(f"Unknown Query Status '{str(result.status)}' for "
    258  - f"site '{self.result.site_name}'")
     280 + raise ValueError(
     281 + f"Unknown Query Status '{str(result.status)}' for "
     282 + f"site '{self.result.site_name}'"
     283 + )
    259 284   
    260 285   if notify:
    261  - sys.stdout.write('\x1b[1K\r')
     286 + sys.stdout.write("\x1b[1K\r")
    262 287   print(notify)
    263 288   
    264 289   return
    skipped 14 lines
  • ■ ■ ■ ■ ■ ■
    maigret/report.py
    skipped 4 lines
    5 5  import os
    6 6  from argparse import ArgumentTypeError
    7 7  from datetime import datetime
     8 +from typing import Dict, Any
    8 9   
    9 10  import pycountry
    10 11  import xmind
    skipped 5 lines
    16 17  from .utils import is_country_tag, CaseConverter, enrich_link_str
    17 18   
    18 19  SUPPORTED_JSON_REPORT_FORMATS = [
    19  - 'simple',
    20  - 'ndjson',
     20 + "simple",
     21 + "ndjson",
    21 22  ]
    22 23   
    23  -'''
     24 +"""
    24 25  UTILS
    25  -'''
     26 +"""
    26 27   
    27 28   
    28 29  def filter_supposed_data(data):
    29  - ### interesting fields
    30  - allowed_fields = ['fullname', 'gender', 'location', 'age']
    31  - filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
    32  - for k, v in data.items()
    33  - if k in allowed_fields}
     30 + # interesting fields
     31 + allowed_fields = ["fullname", "gender", "location", "age"]
     32 + filtered_supposed_data = {
     33 + CaseConverter.snake_to_title(k): v[0]
     34 + for k, v in data.items()
     35 + if k in allowed_fields
     36 + }
    34 37   return filtered_supposed_data
    35 38   
    36 39   
    37  -'''
     40 +"""
    38 41  REPORTS SAVING
    39  -'''
     42 +"""
    40 43   
    41 44   
    42 45  def save_csv_report(filename: str, username: str, results: dict):
    43  - with open(filename, 'w', newline='', encoding='utf-8') as f:
     46 + with open(filename, "w", newline="", encoding="utf-8") as f:
    44 47   generate_csv_report(username, results, f)
    45 48   
    46 49   
    47 50  def save_txt_report(filename: str, username: str, results: dict):
    48  - with open(filename, 'w', encoding='utf-8') as f:
     51 + with open(filename, "w", encoding="utf-8") as f:
    49 52   generate_txt_report(username, results, f)
    50 53   
    51 54   
    52 55  def save_html_report(filename: str, context: dict):
    53 56   template, _ = generate_report_template(is_pdf=False)
    54 57   filled_template = template.render(**context)
    55  - with open(filename, 'w') as f:
     58 + with open(filename, "w") as f:
    56 59   f.write(filled_template)
    57 60   
    58 61   
    59 62  def save_pdf_report(filename: str, context: dict):
    60 63   template, css = generate_report_template(is_pdf=True)
    61 64   filled_template = template.render(**context)
    62  - with open(filename, 'w+b') as f:
     65 + with open(filename, "w+b") as f:
    63 66   pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
    64 67   
    65 68   
    66 69  def save_json_report(filename: str, username: str, results: dict, report_type: str):
    67  - with open(filename, 'w', encoding='utf-8') as f:
     70 + with open(filename, "w", encoding="utf-8") as f:
    68 71   generate_json_report(username, results, f, report_type=report_type)
    69 72   
    70 73   
    71  -'''
     74 +"""
    72 75  REPORTS GENERATING
    73  -'''
     76 +"""
    74 77   
    75 78   
    76 79  def generate_report_template(is_pdf: bool):
    77 80   """
    78  - HTML/PDF template generation
     81 + HTML/PDF template generation
    79 82   """
    80 83   
    81 84   def get_resource_content(filename):
    82  - return open(os.path.join(maigret_path, 'resources', filename)).read()
     85 + return open(os.path.join(maigret_path, "resources", filename)).read()
    83 86   
    84 87   maigret_path = os.path.dirname(os.path.realpath(__file__))
    85 88   
    86 89   if is_pdf:
    87  - template_content = get_resource_content('simple_report_pdf.tpl')
    88  - css_content = get_resource_content('simple_report_pdf.css')
     90 + template_content = get_resource_content("simple_report_pdf.tpl")
     91 + css_content = get_resource_content("simple_report_pdf.css")
    89 92   else:
    90  - template_content = get_resource_content('simple_report.tpl')
     93 + template_content = get_resource_content("simple_report.tpl")
    91 94   css_content = None
    92 95   
    93 96   template = Template(template_content)
    94  - template.globals['title'] = CaseConverter.snake_to_title
    95  - template.globals['detect_link'] = enrich_link_str
     97 + template.globals["title"] = CaseConverter.snake_to_title # type: ignore
     98 + template.globals["detect_link"] = enrich_link_str # type: ignore
    96 99   return template, css_content
    97 100   
    98 101   
    skipped 1 lines
    100 103   brief_text = []
    101 104   usernames = {}
    102 105   extended_info_count = 0
    103  - tags = {}
    104  - supposed_data = {}
     106 + tags: Dict[str, int] = {}
     107 + supposed_data: Dict[str, Any] = {}
    105 108   
    106 109   first_seen = None
    107 110   
    108 111   for username, id_type, results in username_results:
    109 112   found_accounts = 0
    110 113   new_ids = []
    111  - usernames[username] = {'type': id_type}
     114 + usernames[username] = {"type": id_type}
    112 115   
    113 116   for website_name in results:
    114 117   dictionary = results[website_name]
    skipped 1 lines
    116 119   if not dictionary:
    117 120   continue
    118 121   
    119  - if dictionary.get('is_similar'):
     122 + if dictionary.get("is_similar"):
    120 123   continue
    121 124   
    122  - status = dictionary.get('status')
     125 + status = dictionary.get("status")
    123 126   if not status: # FIXME: currently in case of timeout
    124 127   continue
    125 128   
    126 129   if status.ids_data:
    127  - dictionary['ids_data'] = status.ids_data
     130 + dictionary["ids_data"] = status.ids_data
    128 131   extended_info_count += 1
    129 132   
    130 133   # detect first seen
    131  - created_at = status.ids_data.get('created_at')
     134 + created_at = status.ids_data.get("created_at")
    132 135   if created_at:
    133 136   if first_seen is None:
    134 137   first_seen = created_at
    skipped 3 lines
    138 141   new_time = parse_datetime_str(created_at)
    139 142   if new_time < known_time:
    140 143   first_seen = created_at
    141  - except:
    142  - logging.debug('Problems with converting datetime %s/%s', first_seen, created_at)
     144 + except Exception as e:
     145 + logging.debug(
     146 + "Problems with converting datetime %s/%s: %s",
     147 + first_seen,
     148 + created_at,
     149 + str(e),
     150 + )
    143 151   
    144 152   for k, v in status.ids_data.items():
    145 153   # suppose target data
    146  - field = 'fullname' if k == 'name' else k
    147  - if not field in supposed_data:
     154 + field = "fullname" if k == "name" else k
     155 + if field not in supposed_data:
    148 156   supposed_data[field] = []
    149 157   supposed_data[field].append(v)
    150 158   # suppose country
    151  - if k in ['country', 'locale']:
     159 + if k in ["country", "locale"]:
    152 160   try:
    153 161   if is_country_tag(k):
    154 162   tag = pycountry.countries.get(alpha_2=v).alpha_2.lower()
    155 163   else:
    156  - tag = pycountry.countries.search_fuzzy(v)[0].alpha_2.lower()
     164 + tag = pycountry.countries.search_fuzzy(v)[
     165 + 0
     166 + ].alpha_2.lower()
    157 167   # TODO: move countries to another struct
    158 168   tags[tag] = tags.get(tag, 0) + 1
    159 169   except Exception as e:
    160  - logging.debug('pycountry exception', exc_info=True)
     170 + logging.debug(
     171 + "Pycountry exception: %s", str(e), exc_info=True
     172 + )
    161 173   
    162  - new_usernames = dictionary.get('ids_usernames')
     174 + new_usernames = dictionary.get("ids_usernames")
    163 175   if new_usernames:
    164 176   for u, utype in new_usernames.items():
    165  - if not u in usernames:
     177 + if u not in usernames:
    166 178   new_ids.append((u, utype))
    167  - usernames[u] = {'type': utype}
     179 + usernames[u] = {"type": utype}
    168 180   
    169 181   if status.status == QueryStatus.CLAIMED:
    170 182   found_accounts += 1
    171  - dictionary['found'] = True
     183 + dictionary["found"] = True
    172 184   else:
    173 185   continue
    174 186   
    skipped 2 lines
    177 189   for t in status.tags:
    178 190   tags[t] = tags.get(t, 0) + 1
    179 191   
    180  - brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
     192 + brief_text.append(
     193 + f"Search by {id_type} {username} returned {found_accounts} accounts."
     194 + )
    181 195   
    182 196   if new_ids:
    183 197   ids_list = []
    184 198   for u, t in new_ids:
    185  - ids_list.append(f'{u} ({t})' if t != 'username' else u)
    186  - brief_text.append(f'Found target\'s other IDs: ' + ', '.join(ids_list) + '.')
     199 + ids_list.append(f"{u} ({t})" if t != "username" else u)
     200 + brief_text.append("Found target's other IDs: " + ", ".join(ids_list) + ".")
    187 201   
    188  - brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
     202 + brief_text.append(f"Extended info extracted from {extended_info_count} accounts.")
    189 203   
    190  - brief = ' '.join(brief_text).strip()
     204 + brief = " ".join(brief_text).strip()
    191 205   tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
    192 206   
    193  - if 'global' in tags:
     207 + if "global" in tags:
    194 208   # remove tag 'global' useless for country detection
    195  - del tags['global']
     209 + del tags["global"]
    196 210   
    197 211   first_username = username_results[0][0]
    198 212   countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
    skipped 2 lines
    201 215   filtered_supposed_data = filter_supposed_data(supposed_data)
    202 216   
    203 217   return {
    204  - 'username': first_username,
    205  - 'brief': brief,
    206  - 'results': username_results,
    207  - 'first_seen': first_seen,
    208  - 'interests_tuple_list': tuple_sort(interests_list),
    209  - 'countries_tuple_list': tuple_sort(countries_lists),
    210  - 'supposed_data': filtered_supposed_data,
    211  - 'generated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
     218 + "username": first_username,
     219 + "brief": brief,
     220 + "results": username_results,
     221 + "first_seen": first_seen,
     222 + "interests_tuple_list": tuple_sort(interests_list),
     223 + "countries_tuple_list": tuple_sort(countries_lists),
     224 + "supposed_data": filtered_supposed_data,
     225 + "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    212 226   }
    213 227   
    214 228   
    215 229  def generate_csv_report(username: str, results: dict, csvfile):
    216 230   writer = csv.writer(csvfile)
    217  - writer.writerow(['username',
    218  - 'name',
    219  - 'url_main',
    220  - 'url_user',
    221  - 'exists',
    222  - 'http_status'
    223  - ]
    224  - )
     231 + writer.writerow(
     232 + ["username", "name", "url_main", "url_user", "exists", "http_status"]
     233 + )
    225 234   for site in results:
    226  - writer.writerow([username,
    227  - site,
    228  - results[site]['url_main'],
    229  - results[site]['url_user'],
    230  - str(results[site]['status'].status),
    231  - results[site]['http_status'],
    232  - ])
     235 + writer.writerow(
     236 + [
     237 + username,
     238 + site,
     239 + results[site]["url_main"],
     240 + results[site]["url_user"],
     241 + str(results[site]["status"].status),
     242 + results[site]["http_status"],
     243 + ]
     244 + )
    233 245   
    234 246   
    235 247  def generate_txt_report(username: str, results: dict, file):
    skipped 6 lines
    242 254   if dictionary.get("status").status == QueryStatus.CLAIMED:
    243 255   exists_counter += 1
    244 256   file.write(dictionary["url_user"] + "\n")
    245  - file.write(f'Total Websites Username Detected On : {exists_counter}')
     257 + file.write(f"Total Websites Username Detected On : {exists_counter}")
    246 258   
    247 259   
    248 260  def generate_json_report(username: str, results: dict, file, report_type):
    249  - exists_counter = 0
    250  - is_report_per_line = report_type.startswith('ndjson')
     261 + is_report_per_line = report_type.startswith("ndjson")
    251 262   all_json = {}
    252 263   
    253 264   for sitename in results:
    skipped 3 lines
    257 268   continue
    258 269   
    259 270   data = dict(site_result)
    260  - data['status'] = data['status'].json()
     271 + data["status"] = data["status"].json()
    261 272   
    262 273   if is_report_per_line:
    263  - data['sitename'] = sitename
    264  - file.write(json.dumps(data) + '\n')
     274 + data["sitename"] = sitename
     275 + file.write(json.dumps(data) + "\n")
    265 276   else:
    266 277   all_json[sitename] = data
    267 278   
    skipped 1 lines
    269 280   file.write(json.dumps(all_json))
    270 281   
    271 282   
    272  -'''
     283 +"""
    273 284  XMIND 8 Functions
    274  -'''
     285 +"""
    275 286   
    276 287   
    277 288  def save_xmind_report(filename, username, results):
    skipped 6 lines
    284 295   
    285 296   
    286 297  def design_sheet(sheet, username, results):
    287  - ##all tag list
    288 298   alltags = {}
    289 299   supposed_data = {}
    290 300   
    skipped 9 lines
    300 310   dictionary = results[website_name]
    301 311   
    302 312   if dictionary.get("status").status == QueryStatus.CLAIMED:
    303  - ## firsttime I found that entry
     313 + # firsttime I found that entry
    304 314   for tag in dictionary.get("status").tags:
    305 315   if tag.strip() == "":
    306 316   continue
    skipped 22 lines
    329 339   # suppose target data
    330 340   if not isinstance(v, list):
    331 341   currentsublabel = userlink.addSubTopic()
    332  - field = 'fullname' if k == 'name' else k
    333  - if not field in supposed_data:
     342 + field = "fullname" if k == "name" else k
     343 + if field not in supposed_data:
    334 344   supposed_data[field] = []
    335 345   supposed_data[field].append(v)
    336 346   currentsublabel.setTitle("%s: %s" % (k, v))
    337 347   else:
    338 348   for currentval in v:
    339 349   currentsublabel = userlink.addSubTopic()
    340  - field = 'fullname' if k == 'name' else k
    341  - if not field in supposed_data:
     350 + field = "fullname" if k == "name" else k
     351 + if field not in supposed_data:
    342 352   supposed_data[field] = []
    343 353   supposed_data[field].append(currentval)
    344 354   currentsublabel.setTitle("%s: %s" % (k, currentval))
    345  - ### Add Supposed DATA
     355 + # add supposed data
    346 356   filterede_supposed_data = filter_supposed_data(supposed_data)
    347  - if (len(filterede_supposed_data) > 0):
     357 + if len(filterede_supposed_data) > 0:
    348 358   undefinedsection = root_topic1.addSubTopic()
    349 359   undefinedsection.setTitle("SUPPOSED DATA")
    350 360   for k, v in filterede_supposed_data.items():
    skipped 2 lines
    353 363   
    354 364   
    355 365  def check_supported_json_format(value):
    356  - if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
    357  - raise ArgumentTypeError(f'JSON report type must be one of the following types: '
    358  - + ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
     366 + if value and value not in SUPPORTED_JSON_REPORT_FORMATS:
     367 + raise ArgumentTypeError(
     368 + "JSON report type must be one of the following types: "
     369 + + ", ".join(SUPPORTED_JSON_REPORT_FORMATS)
     370 + )
    359 371   return value
    360 372   
  • ■ ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 12147 lines
    12148 12148   "us"
    12149 12149   ],
    12150 12150   "headers": {
    12151  - "authorization": "Bearer BQAEeuyBT6S535Anlx4wU-pfPjjgiE8r2e7j0eOSnwZjSvjFvQgDzxwV__03-WNbwxPKyGehoJ5pQCBwUqs"
     12151 + "authorization": "Bearer BQCe5Yx_Evl2m1Td_86SzknoVan7OZxN6y6WaR7xNrJb8vnZ5B7VZY401MdivLmCQcyv0LUkfo1M-15_m-E"
    12152 12152   },
    12153 12153   "errors": {
    12154 12154   "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
    skipped 1303 lines
    13458 13458   "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
    13459 13459   "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
    13460 13460   "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
    13461  - "x-guest-token": "1387733472027070474"
     13461 + "x-guest-token": "1388029767388106752"
    13462 13462   },
    13463 13463   "errors": {
    13464 13464   "Bad guest token": "x-guest-token update required"
    skipped 196 lines
    13661 13661   "type": "vk_id",
    13662 13662   "checkType": "response_url",
    13663 13663   "alexaRank": 26,
     13664 + "source": "VK",
    13664 13665   "url": "https://vk.com/id{username}",
    13665 13666   "urlMain": "https://vk.com/",
    13666 13667   "usernameClaimed": "270433952",
    skipped 5 lines
    13672 13673   ],
    13673 13674   "checkType": "status_code",
    13674 13675   "alexaRank": 28938,
     13676 + "source": "VK",
    13675 13677   "url": "https://vkfaces.com/vk/user/{username}",
    13676 13678   "urlMain": "https://vkfaces.com",
    13677 13679   "usernameClaimed": "adam",
    skipped 157 lines
    13835 13837   "video"
    13836 13838   ],
    13837 13839   "headers": {
    13838  - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk2OTczNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.yLRq0lhenTYfe0EKKJsk5HZJZt3ykUVNBGuiMCC5HR4"
     13840 + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk3NzM3NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.4O4QL4IsoiKl0Cz1310Qjo9WablDr5LIyMOPQgMS1XE"
    13839 13841   },
    13840 13842   "activation": {
    13841 13843   "url": "https://vimeo.com/_rv/viewer",
    skipped 2283 lines
    16125 16127   "gb",
    16126 16128   "uk"
    16127 16129   ],
    16128  - "checkType": "message",
    16129  - "absenceStrs": "The specified member cannot be found. Please enter a member's entire name.",
     16130 + "engine": "XenForo",
    16130 16131   "alexaRank": 12725,
    16131  - "url": "https://forums.overclockers.co.uk/members/?username={username}",
    16132 16132   "urlMain": "https://forums.overclockers.co.uk",
    16133 16133   "usernameClaimed": "adam",
    16134 16134   "usernameUnclaimed": "noonewouldeverusethis7"
    skipped 7612 lines
    23747 23747   ],
    23748 23748   "url": "https://opensea.io/accounts/{username}",
    23749 23749   "urlMain": "https://opensea.io",
     23750 + "usernameClaimed": "admin",
     23751 + "usernameUnclaimed": "noonewouldeverusethis7"
     23752 + },
     23753 + "SmiHub": {
     23754 + "checkType": "message",
     23755 + "presenseStrs": [
     23756 + "profile",
     23757 + "user-page",
     23758 + "user",
     23759 + " data-name=",
     23760 + "user__img"
     23761 + ],
     23762 + "absenceStrs": [
     23763 + "text-lg mb-3"
     23764 + ],
     23765 + "source": "Instagram",
     23766 + "url": "https://smihub.com/v/{username}",
     23767 + "urlMain": "https://smihub.com",
     23768 + "usernameClaimed": "blue",
     23769 + "usernameUnclaimed": "noonewouldeverusethis7"
     23770 + },
     23771 + "do100verno.info": {
     23772 + "checkType": "message",
     23773 + "presenseStrs": [
     23774 + "white-space: nowrap;"
     23775 + ],
     23776 + "absenceStrs": [
     23777 + "l-main",
     23778 + " l-mainDcL",
     23779 + " l-usrMenu"
     23780 + ],
     23781 + "url": "https://do100verno.info/card/{username}",
     23782 + "urlMain": "https://do100verno.info",
     23783 + "usernameClaimed": "ekostyle",
     23784 + "usernameUnclaimed": "noonewouldeverusethis7"
     23785 + },
     23786 + "www.kinokopilka.pro": {
     23787 + "checkType": "message",
     23788 + "presenseStrs": [
     23789 + "profile",
     23790 + "user",
     23791 + "people",
     23792 + "users",
     23793 + "/people"
     23794 + ],
     23795 + "url": "https://www.kinokopilka.pro/users/{username}",
     23796 + "urlMain": "https://www.kinokopilka.pro",
     23797 + "usernameClaimed": "admin",
     23798 + "usernameUnclaimed": "noonewouldeverusethis7"
     23799 + },
     23800 + "www.turpravda.com": {
     23801 + "checkType": "message",
     23802 + "presenseStrs": [
     23803 + "email",
     23804 + " name"
     23805 + ],
     23806 + "absenceStrs": [
     23807 + "Title",
     23808 + " Shortcut Icon",
     23809 + " submit"
     23810 + ],
     23811 + "url": "https://www.turpravda.com/profile/{username}",
     23812 + "urlMain": "https://www.turpravda.com",
    23750 23813   "usernameClaimed": "admin",
    23751 23814   "usernameUnclaimed": "noonewouldeverusethis7"
    23752 23815   }
    skipped 158 lines
  • ■ ■ ■ ■ ■ ■
    maigret/result.py
    skipped 9 lines
    10 10   
    11 11   Describes status of query about a given username.
    12 12   """
     13 + 
    13 14   CLAIMED = "Claimed" # Username Detected
    14 15   AVAILABLE = "Available" # Username Not Detected
    15 16   UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username
    skipped 11 lines
    27 28   return self.value
    28 29   
    29 30   
    30  -class QueryResult():
     31 +class QueryResult:
    31 32   """Query Result Object.
    32 33   
    33 34   Describes result of query about a given username.
    34 35   """
    35 36   
    36  - def __init__(self, username, site_name, site_url_user, status, ids_data=None,
    37  - query_time=None, context=None, error=None, tags=[]):
     37 + def __init__(
     38 + self,
     39 + username,
     40 + site_name,
     41 + site_url_user,
     42 + status,
     43 + ids_data=None,
     44 + query_time=None,
     45 + context=None,
     46 + error=None,
     47 + tags=[],
     48 + ):
    38 49   """Create Query Result Object.
    39 50   
    40 51   Contains information about a specific method of detecting usernames on
    skipped 36 lines
    77 88   
    78 89   def json(self):
    79 90   return {
    80  - 'username': self.username,
    81  - 'site_name': self.site_name,
    82  - 'url': self.site_url_user,
    83  - 'status': str(self.status),
    84  - 'ids': self.ids_data or {},
    85  - 'tags': self.tags,
     91 + "username": self.username,
     92 + "site_name": self.site_name,
     93 + "url": self.site_url_user,
     94 + "status": str(self.status),
     95 + "ids": self.ids_data or {},
     96 + "tags": self.tags,
    86 97   }
    87 98   
    88 99   def is_found(self):
    skipped 19 lines
  • ■ ■ ■ ■ ■ ■
    maigret/sites.py
    1  -# -*- coding: future_annotations -*-
     1 +# ****************************** -*-
    2 2  """Maigret Sites Information"""
    3 3  import copy
    4 4  import json
    5 5  import sys
     6 +from typing import Optional
    6 7   
    7 8  import requests
    8 9   
    skipped 1 lines
    10 11   
    11 12  # TODO: move to data.json
    12 13  SUPPORTED_TAGS = [
    13  - 'gaming', 'coding', 'photo', 'music', 'blog', 'finance', 'freelance', 'dating',
    14  - 'tech', 'forum', 'porn', 'erotic', 'webcam', 'video', 'movies', 'hacking', 'art',
    15  - 'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
    16  - 'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
    17  - 'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
    18  - 'science', 'medicine', 'reading', 'stock',
     14 + "gaming",
     15 + "coding",
     16 + "photo",
     17 + "music",
     18 + "blog",
     19 + "finance",
     20 + "freelance",
     21 + "dating",
     22 + "tech",
     23 + "forum",
     24 + "porn",
     25 + "erotic",
     26 + "webcam",
     27 + "video",
     28 + "movies",
     29 + "hacking",
     30 + "art",
     31 + "discussion",
     32 + "sharing",
     33 + "writing",
     34 + "wiki",
     35 + "business",
     36 + "shopping",
     37 + "sport",
     38 + "books",
     39 + "news",
     40 + "documents",
     41 + "travel",
     42 + "maps",
     43 + "hobby",
     44 + "apps",
     45 + "classified",
     46 + "career",
     47 + "geosocial",
     48 + "streaming",
     49 + "education",
     50 + "networking",
     51 + "torrent",
     52 + "science",
     53 + "medicine",
     54 + "reading",
     55 + "stock",
    19 56  ]
    20 57   
    21 58   
    skipped 10 lines
    32 69   
    33 70  class MaigretSite:
    34 71   NOT_SERIALIZABLE_FIELDS = [
    35  - 'name',
    36  - 'engineData',
    37  - 'requestFuture',
    38  - 'detectedEngine',
    39  - 'engineObj',
    40  - 'stats',
    41  - 'urlRegexp',
     72 + "name",
     73 + "engineData",
     74 + "requestFuture",
     75 + "detectedEngine",
     76 + "engineObj",
     77 + "stats",
     78 + "urlRegexp",
    42 79   ]
    43 80   
    44 81   def __init__(self, name, information):
    skipped 4 lines
    49 86   self.ignore403 = False
    50 87   self.tags = []
    51 88   
    52  - self.type = 'username'
     89 + self.type = "username"
    53 90   self.headers = {}
    54 91   self.errors = {}
    55 92   self.activation = {}
    56  - self.url_subpath = ''
     93 + self.url_subpath = ""
    57 94   self.regex_check = None
    58 95   self.url_probe = None
    59  - self.check_type = ''
    60  - self.request_head_only = ''
     96 + self.check_type = ""
     97 + self.request_head_only = ""
    61 98   self.get_params = {}
    62 99   
    63 100   self.presense_strs = []
    skipped 20 lines
    84 121   return f"{self.name} ({self.url_main})"
    85 122   
    86 123   def update_detectors(self):
    87  - if 'url' in self.__dict__:
     124 + if "url" in self.__dict__:
    88 125   url = self.url
    89  - for group in ['urlMain', 'urlSubpath']:
     126 + for group in ["urlMain", "urlSubpath"]:
    90 127   if group in url:
    91  - url = url.replace('{' + group + '}', self.__dict__[CaseConverter.camel_to_snake(group)])
     128 + url = url.replace(
     129 + "{" + group + "}",
     130 + self.__dict__[CaseConverter.camel_to_snake(group)],
     131 + )
    92 132   
    93 133   self.url_regexp = URLMatcher.make_profile_url_regexp(url, self.regex_check)
    94 134   
    95  - def detect_username(self, url: str) -> str:
     135 + def detect_username(self, url: str) -> Optional[str]:
    96 136   if self.url_regexp:
    97 137   match_groups = self.url_regexp.match(url)
    98 138   if match_groups:
    99  - return match_groups.groups()[-1].rstrip('/')
     139 + return match_groups.groups()[-1].rstrip("/")
    100 140   
    101 141   return None
    102 142   
    103 143   @property
    104 144   def pretty_name(self):
    105 145   if self.source:
    106  - return f'{self.name} [{self.source}]'
     146 + return f"{self.name} [{self.source}]"
    107 147   return self.name
    108 148   
    109 149   @property
    skipped 3 lines
    113 153   # convert to camelCase
    114 154   field = CaseConverter.snake_to_camel(k)
    115 155   # strip empty elements
    116  - if v in (False, '', [], {}, None, sys.maxsize, 'username'):
     156 + if v in (False, "", [], {}, None, sys.maxsize, "username"):
    117 157   continue
    118 158   if field in self.NOT_SERIALIZABLE_FIELDS:
    119 159   continue
    skipped 1 lines
    121 161   
    122 162   return result
    123 163   
    124  - def update(self, updates: dict) -> MaigretSite:
     164 + def update(self, updates: "dict") -> "MaigretSite":
    125 165   self.__dict__.update(updates)
    126 166   self.update_detectors()
    127 167   
    128 168   return self
    129 169   
    130  - def update_from_engine(self, engine: MaigretEngine) -> MaigretSite:
     170 + def update_from_engine(self, engine: MaigretEngine) -> "MaigretSite":
    131 171   engine_data = engine.site
    132 172   for k, v in engine_data.items():
    133 173   field = CaseConverter.camel_to_snake(k)
    skipped 11 lines
    145 185   
    146 186   return self
    147 187   
    148  - def strip_engine_data(self) -> MaigretSite:
     188 + def strip_engine_data(self) -> "MaigretSite":
    149 189   if not self.engine_obj:
    150 190   return self
    151 191   
    skipped 38 lines
    190 230   def sites_dict(self):
    191 231   return {site.name: site for site in self._sites}
    192 232   
    193  - def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[], names=[],
    194  - disabled=True, id_type='username'):
     233 + def ranked_sites_dict(
     234 + self,
     235 + reverse=False,
     236 + top=sys.maxsize,
     237 + tags=[],
     238 + names=[],
     239 + disabled=True,
     240 + id_type="username",
     241 + ):
    195 242   """
    196  - Ranking and filtering of the sites list
     243 + Ranking and filtering of the sites list
    197 244   """
    198 245   normalized_names = list(map(str.lower, names))
    199 246   normalized_tags = list(map(str.lower, tags))
    200 247   
    201 248   is_name_ok = lambda x: x.name.lower() in normalized_names
    202 249   is_source_ok = lambda x: x.source and x.source.lower() in normalized_names
    203  - is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
     250 + is_engine_ok = (
     251 + lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
     252 + )
    204 253   is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
    205  - is_disabled_needed = lambda x: not x.disabled or ('disabled' in tags or disabled)
     254 + is_disabled_needed = lambda x: not x.disabled or (
     255 + "disabled" in tags or disabled
     256 + )
    206 257   is_id_type_ok = lambda x: x.type == id_type
    207 258   
    208 259   filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
    209 260   filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
    210 261   
    211  - filter_fun = lambda x: filter_tags_engines_fun(x) and filter_names_fun(x) \
    212  - and is_disabled_needed(x) and is_id_type_ok(x)
     262 + filter_fun = (
     263 + lambda x: filter_tags_engines_fun(x)
     264 + and filter_names_fun(x)
     265 + and is_disabled_needed(x)
     266 + and is_id_type_ok(x)
     267 + )
    213 268   
    214 269   filtered_list = [s for s in self.sites if filter_fun(s)]
    215 270   
    216  - sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top]
     271 + sorted_list = sorted(
     272 + filtered_list, key=lambda x: x.alexa_rank, reverse=reverse
     273 + )[:top]
    217 274   return {site.name: site for site in sorted_list}
    218 275   
    219 276   @property
    skipped 4 lines
    224 281   def engines_dict(self):
    225 282   return {engine.name: engine for engine in self._engines}
    226 283   
    227  - def update_site(self, site: MaigretSite) -> MaigretDatabase:
     284 + def update_site(self, site: MaigretSite) -> "MaigretDatabase":
    228 285   for s in self._sites:
    229 286   if s.name == site.name:
    230 287   s = site
    skipped 2 lines
    233 290   self._sites.append(site)
    234 291   return self
    235 292   
    236  - def save_to_file(self, filename: str) -> MaigretDatabase:
     293 + def save_to_file(self, filename: str) -> "MaigretDatabase":
    237 294   db_data = {
    238  - 'sites': {site.name: site.strip_engine_data().json for site in self._sites},
    239  - 'engines': {engine.name: engine.json for engine in self._engines},
     295 + "sites": {site.name: site.strip_engine_data().json for site in self._sites},
     296 + "engines": {engine.name: engine.json for engine in self._engines},
    240 297   }
    241 298   
    242 299   json_data = json.dumps(db_data, indent=4)
    243 300   
    244  - with open(filename, 'w') as f:
     301 + with open(filename, "w") as f:
    245 302   f.write(json_data)
    246 303   
    247 304   return self
    248 305   
    249  - def load_from_json(self, json_data: dict) -> MaigretDatabase:
     306 + def load_from_json(self, json_data: dict) -> "MaigretDatabase":
    250 307   # Add all of site information from the json file to internal site list.
    251 308   site_data = json_data.get("sites", {})
    252 309   engines_data = json_data.get("engines", {})
    skipped 5 lines
    258 315   try:
    259 316   maigret_site = MaigretSite(site_name, site_data[site_name])
    260 317   
    261  - engine = site_data[site_name].get('engine')
     318 + engine = site_data[site_name].get("engine")
    262 319   if engine:
    263 320   maigret_site.update_from_engine(self.engines_dict[engine])
    264 321   
    265 322   self._sites.append(maigret_site)
    266 323   except KeyError as error:
    267  - raise ValueError(f"Problem parsing json content for site {site_name}: "
    268  - f"Missing attribute {str(error)}."
    269  - )
     324 + raise ValueError(
     325 + f"Problem parsing json content for site {site_name}: "
     326 + f"Missing attribute {str(error)}."
     327 + )
    270 328   
    271 329   return self
    272 330   
    273  - def load_from_str(self, db_str: str) -> MaigretDatabase:
     331 + def load_from_str(self, db_str: "str") -> "MaigretDatabase":
    274 332   try:
    275 333   data = json.loads(db_str)
    276 334   except Exception as error:
    277  - raise ValueError(f"Problem parsing json contents from str"
    278  - f"'{db_str[:50]}'...: {str(error)}."
    279  - )
     335 + raise ValueError(
     336 + f"Problem parsing json contents from str"
     337 + f"'{db_str[:50]}'...: {str(error)}."
     338 + )
    280 339   
    281 340   return self.load_from_json(data)
    282 341   
    283  - def load_from_url(self, url: str) -> MaigretDatabase:
    284  - is_url_valid = url.startswith('http://') or url.startswith('https://')
     342 + def load_from_url(self, url: str) -> "MaigretDatabase":
     343 + is_url_valid = url.startswith("http://") or url.startswith("https://")
    285 344   
    286 345   if not is_url_valid:
    287 346   raise FileNotFoundError(f"Invalid data file URL '{url}'.")
    skipped 1 lines
    289 348   try:
    290 349   response = requests.get(url=url)
    291 350   except Exception as error:
    292  - raise FileNotFoundError(f"Problem while attempting to access "
    293  - f"data file URL '{url}': "
    294  - f"{str(error)}"
    295  - )
     351 + raise FileNotFoundError(
     352 + f"Problem while attempting to access "
     353 + f"data file URL '{url}': "
     354 + f"{str(error)}"
     355 + )
    296 356   
    297 357   if response.status_code == 200:
    298 358   try:
    299 359   data = response.json()
    300 360   except Exception as error:
    301  - raise ValueError(f"Problem parsing json contents at "
    302  - f"'{url}': {str(error)}."
    303  - )
     361 + raise ValueError(
     362 + f"Problem parsing json contents at " f"'{url}': {str(error)}."
     363 + )
    304 364   else:
    305  - raise FileNotFoundError(f"Bad response while accessing "
    306  - f"data file URL '{url}'."
    307  - )
     365 + raise FileNotFoundError(
     366 + f"Bad response while accessing " f"data file URL '{url}'."
     367 + )
    308 368   
    309 369   return self.load_from_json(data)
    310 370   
    311  - def load_from_file(self, filename: str) -> MaigretDatabase:
     371 + def load_from_file(self, filename: "str") -> "MaigretDatabase":
    312 372   try:
    313  - with open(filename, 'r', encoding='utf-8') as file:
     373 + with open(filename, "r", encoding="utf-8") as file:
    314 374   try:
    315 375   data = json.load(file)
    316 376   except Exception as error:
    317  - raise ValueError(f"Problem parsing json contents from "
    318  - f"file '{filename}': {str(error)}."
    319  - )
     377 + raise ValueError(
     378 + f"Problem parsing json contents from "
     379 + f"file '{filename}': {str(error)}."
     380 + )
    320 381   except FileNotFoundError as error:
    321  - raise FileNotFoundError(f"Problem while attempting to access "
    322  - f"data file '{filename}'."
    323  - )
     382 + raise FileNotFoundError(
     383 + f"Problem while attempting to access " f"data file '{filename}'."
     384 + ) from error
    324 385   
    325 386   return self.load_from_json(data)
    326 387   
    skipped 1 lines
    328 389   sites = sites_dict or self.sites_dict
    329 390   found_flags = {}
    330 391   for _, s in sites.items():
    331  - if 'presense_flag' in s.stats:
    332  - flag = s.stats['presense_flag']
     392 + if "presense_flag" in s.stats:
     393 + flag = s.stats["presense_flag"]
    333 394   found_flags[flag] = found_flags.get(flag, 0) + 1
    334 395   
    335 396   return found_flags
    skipped 2 lines
    338 399   if not sites_dict:
    339 400   sites_dict = self.sites_dict()
    340 401   
    341  - output = ''
     402 + output = ""
    342 403   disabled_count = 0
    343 404   total_count = len(sites_dict)
    344 405   urls = {}
    skipped 4 lines
    349 410   disabled_count += 1
    350 411   
    351 412   url = URLMatcher.extract_main_part(site.url)
    352  - if url.startswith('{username}'):
    353  - url = 'SUBDOMAIN'
    354  - elif url == '':
    355  - url = f'{site.url} ({site.engine})'
     413 + if url.startswith("{username}"):
     414 + url = "SUBDOMAIN"
     415 + elif url == "":
     416 + url = f"{site.url} ({site.engine})"
    356 417   else:
    357  - parts = url.split('/')
    358  - url = '/' + '/'.join(parts[1:])
     418 + parts = url.split("/")
     419 + url = "/" + "/".join(parts[1:])
    359 420   
    360 421   urls[url] = urls.get(url, 0) + 1
    361 422   
    362 423   if not site.tags:
    363  - tags['NO_TAGS'] = tags.get('NO_TAGS', 0) + 1
     424 + tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
    364 425   
    365 426   for tag in site.tags:
    366 427   if is_country_tag(tag):
    skipped 1 lines
    368 429   continue
    369 430   tags[tag] = tags.get(tag, 0) + 1
    370 431   
    371  - output += f'Enabled/total sites: {total_count - disabled_count}/{total_count}\n'
    372  - output += 'Top sites\' profile URLs:\n'
     432 + output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
     433 + output += "Top sites' profile URLs:\n"
    373 434   for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
    374 435   if count == 1:
    375 436   break
    376  - output += f'{count}\t{url}\n'
    377  - output += 'Top sites\' tags:\n'
     437 + output += f"{count}\t{url}\n"
     438 + output += "Top sites' tags:\n"
    378 439   for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True):
    379  - mark = ''
    380  - if not tag in SUPPORTED_TAGS:
    381  - mark = ' (non-standard)'
    382  - output += f'{count}\t{tag}{mark}\n'
     440 + mark = ""
     441 + if tag not in SUPPORTED_TAGS:
     442 + mark = " (non-standard)"
     443 + output += f"{count}\t{tag}{mark}\n"
    383 444   
    384 445   return output
    385 446   
  • ■ ■ ■ ■ ■ ■
    maigret/submit.py
     1 +import asyncio
    1 2  import difflib
     3 +import re
    2 4   
    3 5  import requests
    4 6   
    5  -from .checking import *
     7 +from .activation import import_aiohttp_cookies
     8 +from .checking import maigret
     9 +from .result import QueryStatus
     10 +from .sites import MaigretDatabase, MaigretSite
    6 11  from .utils import get_random_user_agent
    7 12   
    8 13   
    9  -DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
    10  - "birthday", "репутация", "информация", "e-mail"]
     14 +DESIRED_STRINGS = [
     15 + "username",
     16 + "not found",
     17 + "пользователь",
     18 + "profile",
     19 + "lastname",
     20 + "firstname",
     21 + "biography",
     22 + "birthday",
     23 + "репутация",
     24 + "информация",
     25 + "e-mail",
     26 +]
    11 27   
    12  -SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
     28 +SUPPOSED_USERNAMES = ["alex", "god", "admin", "red", "blue", "john"]
    13 29   
    14 30  HEADERS = {
    15  - 'User-Agent': get_random_user_agent(),
     31 + "User-Agent": get_random_user_agent(),
    16 32  }
    17 33   
    18 34  RATIO = 0.6
    19 35  TOP_FEATURES = 5
    20  -URL_RE = re.compile(r'https?://(www\.)?')
     36 +URL_RE = re.compile(r"https?://(www\.)?")
    21 37   
    22 38   
    23 39  def get_match_ratio(x):
    24  - return round(max([
    25  - difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
    26  - for y in DESIRED_STRINGS
    27  - ]), 2)
     40 + return round(
     41 + max(
     42 + [difflib.SequenceMatcher(a=x.lower(), b=y).ratio() for y in DESIRED_STRINGS]
     43 + ),
     44 + 2,
     45 + )
    28 46   
    29 47   
    30 48  def extract_mainpage_url(url):
    31  - return '/'.join(url.split('/', 3)[:3])
     49 + return "/".join(url.split("/", 3)[:3])
    32 50   
    33 51   
    34 52  async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
    35 53   changes = {
    36  - 'disabled': False,
     54 + "disabled": False,
    37 55   }
    38 56   
    39 57   check_data = [
    skipped 1 lines
    41 59   (site.username_unclaimed, QueryStatus.AVAILABLE),
    42 60   ]
    43 61   
    44  - logger.info(f'Checking {site.name}...')
     62 + logger.info(f"Checking {site.name}...")
    45 63   
    46 64   for username, status in check_data:
    47 65   results_dict = await maigret(
    skipped 10 lines
    58 76   # TODO: make normal checking
    59 77   if site.name not in results_dict:
    60 78   logger.info(results_dict)
    61  - changes['disabled'] = True
     79 + changes["disabled"] = True
    62 80   continue
    63 81   
    64  - result = results_dict[site.name]['status']
     82 + result = results_dict[site.name]["status"]
    65 83   
    66 84   site_status = result.status
    67 85   
    skipped 2 lines
    70 88   msgs = site.absence_strs
    71 89   etype = site.check_type
    72 90   logger.warning(
    73  - f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
     91 + f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
     92 + )
    74 93   # don't disable in case of available username
    75 94   if status == QueryStatus.CLAIMED:
    76  - changes['disabled'] = True
     95 + changes["disabled"] = True
    77 96   elif status == QueryStatus.CLAIMED:
    78  - logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
     97 + logger.warning(
     98 + f"Not found `{username}` in {site.name}, must be claimed"
     99 + )
    79 100   logger.info(results_dict[site.name])
    80  - changes['disabled'] = True
     101 + changes["disabled"] = True
    81 102   else:
    82  - logger.warning(f'Found `{username}` in {site.name}, must be available')
     103 + logger.warning(f"Found `{username}` in {site.name}, must be available")
    83 104   logger.info(results_dict[site.name])
    84  - changes['disabled'] = True
     105 + changes["disabled"] = True
    85 106   
    86  - logger.info(f'Site {site.name} checking is finished')
     107 + logger.info(f"Site {site.name} checking is finished")
    87 108   
    88 109   return changes
    89 110   
    skipped 3 lines
    93 114   r = requests.get(url_mainpage)
    94 115   except Exception as e:
    95 116   print(e)
    96  - print('Some error while checking main page')
     117 + print("Some error while checking main page")
    97 118   return None
    98 119   
    99  - for e in db.engines:
    100  - strs_to_check = e.__dict__.get('presenseStrs')
     120 + for engine in db.engines:
     121 + strs_to_check = engine.__dict__.get("presenseStrs")
    101 122   if strs_to_check and r and r.text:
    102 123   all_strs_in_response = True
    103 124   for s in strs_to_check:
    104  - if not s in r.text:
     125 + if s not in r.text:
    105 126   all_strs_in_response = False
    106 127   if all_strs_in_response:
    107  - engine_name = e.__dict__.get('name')
    108  - print(f'Detected engine {engine_name} for site {url_mainpage}')
     128 + engine_name = engine.__dict__.get("name")
     129 + print(f"Detected engine {engine_name} for site {url_mainpage}")
    109 130   
    110 131   sites = []
    111 132   for u in SUPPOSED_USERNAMES:
    112 133   site_data = {
    113  - 'urlMain': url_mainpage,
    114  - 'name': url_mainpage.split('//')[0],
    115  - 'engine': engine_name,
    116  - 'usernameClaimed': u,
    117  - 'usernameUnclaimed': 'noonewouldeverusethis7',
     134 + "urlMain": url_mainpage,
     135 + "name": url_mainpage.split("//")[0],
     136 + "engine": engine_name,
     137 + "usernameClaimed": u,
     138 + "usernameUnclaimed": "noonewouldeverusethis7",
    118 139   }
    119 140   
    120  - maigret_site = MaigretSite(url_mainpage.split('/')[-1], site_data)
     141 + maigret_site = MaigretSite(url_mainpage.split("/")[-1], site_data)
    121 142   maigret_site.update_from_engine(db.engines_dict[engine_name])
    122 143   sites.append(maigret_site)
    123 144   
    skipped 2 lines
    126 147   return None
    127 148   
    128 149   
    129  -async def check_features_manually(db, url_exists, url_mainpage, cookie_file, logger, redirects=True):
    130  - url_parts = url_exists.split('/')
     150 +async def check_features_manually(
     151 + db, url_exists, url_mainpage, cookie_file, logger, redirects=True
     152 +):
     153 + url_parts = url_exists.split("/")
    131 154   supposed_username = url_parts[-1]
    132  - new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
     155 + new_name = input(
     156 + f'Is "{supposed_username}" a valid username? If not, write it manually: '
     157 + )
    133 158   if new_name:
    134 159   supposed_username = new_name
    135  - non_exist_username = 'noonewouldeverusethis7'
     160 + non_exist_username = "noonewouldeverusethis7"
    136 161   
    137  - url_user = url_exists.replace(supposed_username, '{username}')
     162 + url_user = url_exists.replace(supposed_username, "{username}")
    138 163   url_not_exists = url_exists.replace(supposed_username, non_exist_username)
    139 164   
    140 165   # cookies
    skipped 2 lines
    143 168   cookie_jar = await import_aiohttp_cookies(cookie_file)
    144 169   cookie_dict = {c.key: c.value for c in cookie_jar}
    145 170   
    146  - exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
     171 + exists_resp = requests.get(
     172 + url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
     173 + )
    147 174   logger.debug(exists_resp.status_code)
    148 175   logger.debug(exists_resp.text)
    149 176   
    150  - non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
     177 + non_exists_resp = requests.get(
     178 + url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
     179 + )
    151 180   logger.debug(non_exists_resp.status_code)
    152 181   logger.debug(non_exists_resp.text)
    153  - 
    154 182   
    155 183   a = exists_resp.text
    156 184   b = non_exists_resp.text
    skipped 5 lines
    162 190   b_minus_a = tokens_b.difference(tokens_a)
    163 191   
    164 192   if len(a_minus_b) == len(b_minus_a) == 0:
    165  - print('The pages for existing and non-existing account are the same!')
     193 + print("The pages for existing and non-existing account are the same!")
    166 194   
    167  - top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
     195 + top_features_count = int(
     196 + input(f"Specify count of features to extract [default {TOP_FEATURES}]: ")
     197 + or TOP_FEATURES
     198 + )
    168 199   
    169  - presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
     200 + presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[
     201 + :top_features_count
     202 + ]
    170 203   
    171  - print('Detected text features of existing account: ' + ', '.join(presence_list))
    172  - features = input('If features was not detected correctly, write it manually: ')
     204 + print("Detected text features of existing account: " + ", ".join(presence_list))
     205 + features = input("If features was not detected correctly, write it manually: ")
    173 206   
    174 207   if features:
    175  - presence_list = features.split(',')
     208 + presence_list = features.split(",")
    176 209   
    177  - absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[:top_features_count]
    178  - print('Detected text features of non-existing account: ' + ', '.join(absence_list))
    179  - features = input('If features was not detected correctly, write it manually: ')
     210 + absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
     211 + :top_features_count
     212 + ]
     213 + print("Detected text features of non-existing account: " + ", ".join(absence_list))
     214 + features = input("If features was not detected correctly, write it manually: ")
    180 215   
    181 216   if features:
    182  - absence_list = features.split(',')
     217 + absence_list = features.split(",")
    183 218   
    184 219   site_data = {
    185  - 'absenceStrs': absence_list,
    186  - 'presenseStrs': presence_list,
    187  - 'url': url_user,
    188  - 'urlMain': url_mainpage,
    189  - 'usernameClaimed': supposed_username,
    190  - 'usernameUnclaimed': non_exist_username,
    191  - 'checkType': 'message',
     220 + "absenceStrs": absence_list,
     221 + "presenseStrs": presence_list,
     222 + "url": url_user,
     223 + "urlMain": url_mainpage,
     224 + "usernameClaimed": supposed_username,
     225 + "usernameUnclaimed": non_exist_username,
     226 + "checkType": "message",
    192 227   }
    193 228   
    194  - site = MaigretSite(url_mainpage.split('/')[-1], site_data)
     229 + site = MaigretSite(url_mainpage.split("/")[-1], site_data)
    195 230   return site
    196 231   
    197 232   
    198 233  async def submit_dialog(db, url_exists, cookie_file, logger):
    199  - domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
    200  - domain_raw = domain_raw.split('/')[0]
     234 + domain_raw = URL_RE.sub("", url_exists).strip().strip("/")
     235 + domain_raw = domain_raw.split("/")[0]
    201 236   
    202 237   # check for existence
    203 238   matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
    204 239   
    205 240   if matched_sites:
    206  - print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
    207  - status = lambda s: '(disabled)' if s.disabled else ''
    208  - url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
    209  - print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
     241 + print(
     242 + f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
     243 + )
     244 + status = lambda s: "(disabled)" if s.disabled else ""
     245 + url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
     246 + print(
     247 + "\n".join(
     248 + [
     249 + f"{site.name} {status(site)}{url_block(site)}"
     250 + for site in matched_sites
     251 + ]
     252 + )
     253 + )
    210 254   
    211  - if input(f'Do you want to continue? [yN] ').lower() in 'n':
     255 + if input("Do you want to continue? [yN] ").lower() in "n":
    212 256   return False
    213 257   
    214 258   url_mainpage = extract_mainpage_url(url_exists)
    215 259   
    216 260   sites = await detect_known_engine(db, url_exists, url_mainpage)
    217 261   if not sites:
    218  - print('Unable to detect site engine, lets generate checking features')
    219  - sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file, logger)]
     262 + print("Unable to detect site engine, lets generate checking features")
     263 + sites = [
     264 + await check_features_manually(
     265 + db, url_exists, url_mainpage, cookie_file, logger
     266 + )
     267 + ]
    220 268   
    221 269   logger.debug(sites[0].__dict__)
    222 270   
    skipped 4 lines
    227 275   for s in sites:
    228 276   chosen_site = s
    229 277   result = await site_self_check(s, logger, sem, db)
    230  - if not result['disabled']:
     278 + if not result["disabled"]:
    231 279   found = True
    232 280   break
    233 281   
    234 282   if not found:
    235  - print(f'Sorry, we couldn\'t find params to detect account presence/absence in {chosen_site.name}.')
    236  - print('Try to run this mode again and increase features count or choose others.')
     283 + print(
     284 + f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
     285 + )
     286 + print(
     287 + "Try to run this mode again and increase features count or choose others."
     288 + )
    237 289   else:
    238  - if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
     290 + if (
     291 + input(
     292 + f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
     293 + ).lower()
     294 + in "y"
     295 + ):
    239 296   logger.debug(chosen_site.json)
    240 297   site_data = chosen_site.strip_engine_data()
    241 298   logger.debug(site_data.json)
    skipped 5 lines
  • ■ ■ ■ ■ ■ ■
    maigret/types.py
    skipped 3 lines
    4 4  # search query
    5 5  QueryDraft = Tuple[Callable, Any, Any]
    6 6   
    7  -# error got as a result of completed search query
    8  -class CheckError:
    9  - _type = 'Unknown'
    10  - _desc = ''
    11  - 
    12  - def __init__(self, typename, desc=''):
    13  - self._type = typename
    14  - self._desc = desc
    15  - 
    16  - def __str__(self):
    17  - if not self._desc:
    18  - return f'{self._type} error'
    19  - 
    20  - return f'{self._type} error: {self._desc}'
    21  - 
    22  - @property
    23  - def type(self):
    24  - return self._type
    25  -
    26  - @property
    27  - def desc(self):
    28  - return self._desc
    29  - 
  • ■ ■ ■ ■ ■ ■
    maigret/utils.py
    skipped 2 lines
    3 3   
    4 4   
    5 5  DEFAULT_USER_AGENTS = [
    6  - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
     6 + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
    7 7  ]
    8 8   
    9 9   
    10 10  class CaseConverter:
    11 11   @staticmethod
    12 12   def camel_to_snake(camelcased_string: str) -> str:
    13  - return re.sub(r'(?<!^)(?=[A-Z])', '_', camelcased_string).lower()
     13 + return re.sub(r"(?<!^)(?=[A-Z])", "_", camelcased_string).lower()
    14 14   
    15 15   @staticmethod
    16 16   def snake_to_camel(snakecased_string: str) -> str:
    17  - formatted = ''.join(word.title() for word in snakecased_string.split('_'))
     17 + formatted = "".join(word.title() for word in snakecased_string.split("_"))
    18 18   result = formatted[0].lower() + formatted[1:]
    19 19   return result
    20 20   
    21 21   @staticmethod
    22 22   def snake_to_title(snakecased_string: str) -> str:
    23  - words = snakecased_string.split('_')
     23 + words = snakecased_string.split("_")
    24 24   words[0] = words[0].title()
    25  - return ' '.join(words)
     25 + return " ".join(words)
    26 26   
    27 27   
    28 28  def is_country_tag(tag: str) -> bool:
    29 29   """detect if tag represent a country"""
    30  - return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == 'global'
     30 + return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == "global"
    31 31   
    32 32   
    33 33  def enrich_link_str(link: str) -> str:
    34 34   link = link.strip()
    35  - if link.startswith('www.') or (link.startswith('http') and '//' in link):
     35 + if link.startswith("www.") or (link.startswith("http") and "//" in link):
    36 36   return f'<a class="auto-link" href="{link}">{link}</a>'
    37 37   return link
    38 38   
    39 39   
    40 40  class URLMatcher:
    41  - _HTTP_URL_RE_STR = '^https?://(www.)?(.+)$'
     41 + _HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
    42 42   HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
    43  - UNSAFE_SYMBOLS = '.?'
     43 + UNSAFE_SYMBOLS = ".?"
    44 44   
    45 45   @classmethod
    46 46   def extract_main_part(self, url: str) -> str:
    47 47   match = self.HTTP_URL_RE.search(url)
    48 48   if match and match.group(2):
    49  - return match.group(2).rstrip('/')
     49 + return match.group(2).rstrip("/")
    50 50   
    51  - return ''
     51 + return ""
    52 52   
    53 53   @classmethod
    54  - def make_profile_url_regexp(self, url: str, username_regexp: str = ''):
     54 + def make_profile_url_regexp(self, url: str, username_regexp: str = ""):
    55 55   url_main_part = self.extract_main_part(url)
    56 56   for c in self.UNSAFE_SYMBOLS:
    57  - url_main_part = url_main_part.replace(c, f'\\{c}')
    58  - username_regexp = username_regexp or '.+?'
     57 + url_main_part = url_main_part.replace(c, f"\\{c}")
     58 + username_regexp = username_regexp or ".+?"
    59 59   
    60  - url_regexp = url_main_part.replace('{username}', f'({username_regexp})')
    61  - regexp_str = self._HTTP_URL_RE_STR.replace('(.+)', url_regexp)
     60 + url_regexp = url_main_part.replace("{username}", f"({username_regexp})")
     61 + regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
    62 62   
    63 63   return re.compile(regexp_str)
    64 64   
    65 65   
    66  -def get_dict_ascii_tree(items, prepend='', new_line=True):
    67  - text = ''
     66 +def get_dict_ascii_tree(items, prepend="", new_line=True):
     67 + text = ""
    68 68   for num, item in enumerate(items):
    69  - box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
     69 + box_symbol = "┣╸" if num != len(items) - 1 else "┗╸"
    70 70   
    71 71   if type(item) == tuple:
    72 72   field_name, field_value = item
    73  - if field_value.startswith('[\''):
     73 + if field_value.startswith("['"):
    74 74   is_last_item = num == len(items) - 1
    75  - prepend_symbols = ' ' * 3 if is_last_item else ''
     75 + prepend_symbols = " " * 3 if is_last_item else ""
    76 76   field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
    77  - text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
     77 + text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
    78 78   else:
    79  - text += f'\n{prepend}{box_symbol} {item}'
     79 + text += f"\n{prepend}{box_symbol} {item}"
    80 80   
    81 81   if not new_line:
    82 82   text = text[1:]
    skipped 7 lines
  • ■ ■ ■ ■ ■
    maigret.py
    skipped 15 lines
    16 16   
    17 17  if __name__ == "__main__":
    18 18   run()
     19 + 
  • ■ ■ ■ ■ ■ ■
    setup.cfg
    1 1  [egg_info]
    2 2  tag_build =
    3 3  tag_date = 0
     4 + 
     5 +[flake8]
     6 +per-file-ignores = __init__.py:F401
     7 + 
     8 +[mypy]
     9 +ignore_missing_imports = True
  • ■ ■ ■ ■ ■ ■
    test.sh
     1 +#!/bin/sh
     2 +pytest tests
     3 + 
  • ■ ■ ■ ■ ■ ■
    wizard.py
    skipped 25 lines
    26 26   # user input
    27 27   username = input('Enter username to search: ')
    28 28   
    29  - sites_count_raw = input(f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): ')
     29 + sites_count_raw = input(
     30 + f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
     31 + )
    30 32   sites_count = int(sites_count_raw) or TOP_SITES_COUNT
    31 33   
    32 34   sites = db.ranked_sites_dict(top=sites_count)
    skipped 1 lines
    34 36   show_progressbar_raw = input('Do you want to show a progressbar? [Yn] ')
    35 37   show_progressbar = show_progressbar_raw.lower() != 'n'
    36 38   
    37  - extract_info_raw = input('Do you want to extract additional info from accounts\' pages? [Yn] ')
     39 + extract_info_raw = input(
     40 + 'Do you want to extract additional info from accounts\' pages? [Yn] '
     41 + )
    38 42   extract_info = extract_info_raw.lower() != 'n'
    39 43   
    40  - use_notifier_raw = input('Do you want to use notifier for displaying results while searching? [Yn] ')
     44 + use_notifier_raw = input(
     45 + 'Do you want to use notifier for displaying results while searching? [Yn] '
     46 + )
    41 47   use_notifier = use_notifier_raw.lower() != 'n'
    42 48   
    43 49   notifier = None
    skipped 1 lines
    45 51   notifier = maigret.Notifier(print_found_only=True, skip_check_errors=True)
    46 52   
    47 53   # search!
    48  - search_func = maigret.search(username=username,
    49  - site_dict=sites,
    50  - timeout=TIMEOUT,
    51  - logger=logger,
    52  - max_connections=MAX_CONNECTIONS,
    53  - query_notify=notifier,
    54  - no_progressbar=(not show_progressbar),
    55  - is_parsing_enabled=extract_info,
    56  - )
     54 + search_func = maigret.search(
     55 + username=username,
     56 + site_dict=sites,
     57 + timeout=TIMEOUT,
     58 + logger=logger,
     59 + max_connections=MAX_CONNECTIONS,
     60 + query_notify=notifier,
     61 + no_progressbar=(not show_progressbar),
     62 + is_parsing_enabled=extract_info,
     63 + )
    57 64   
    58 65   results = loop.run_until_complete(search_func)
    59 66   
    skipped 6 lines
Please wait...
Page is in error, reload to recover