Projects STRLCPY maigret Commits 0e9655c4
🤬
  • Improve extracting ids from URLs, tests

  • Loading...
  • Soxoj committed 3 years ago
    0e9655c4
    1 parent 009d51c3
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■
    .gitignore
    skipped 21 lines
    22 22  # Comma-Separated Values (CSV) Reports
    23 23  *.csv
    24 24   
    25  -# Excluded sites list
    26  -tests/.excluded_sites
    27  - 
    28 25  # MacOS Folder Metadata File
    29 26  .DS_Store
    30 27  /reports/
    skipped 2 lines
    33 30  .coverage
    34 31  dist/
    35 32  htmlcov/
    36  -test_*
     33 +/test_*
  • ■ ■ ■ ■ ■ ■
    maigret/maigret.py
    skipped 59 lines
    60 60   )
    61 61   
    62 62   
     63 +def extract_ids_from_url(url: str, db: MaigretDatabase) -> dict:
     64 + results = {}
     65 + for s in db.sites:
     66 + result = s.extract_id_from_url(url)
     67 + if not result:
     68 + continue
     69 + _id, _type = result
     70 + results[_id] = _type
     71 + return results
     72 + 
     73 + 
    63 74  def extract_ids_from_page(url, logger, timeout=5) -> dict:
    64 75   results = {}
    65 76   # url, headers
    skipped 39 lines
    105 116   ids_results[u] = utype
    106 117   
    107 118   for url in dictionary.get('ids_links', []):
    108  - for s in db.sites:
    109  - u = s.detect_username(url)
    110  - if u:
    111  - ids_results[u] = 'username'
     119 + ids_results.update(extract_ids_from_url(url, db))
     120 + 
    112 121   return ids_results
    113 122   
    114 123   
    skipped 14 lines
    129 138   )
    130 139   parser.add_argument(
    131 140   "username",
    132  - nargs='?',
     141 + nargs='*',
    133 142   metavar="USERNAMES",
    134  - action="append",
    135  - help="One or more usernames to check with social networks.",
     143 + help="One or more usernames to search by.",
    136 144   )
    137 145   parser.add_argument(
    138 146   "--version",
    skipped 92 lines
    231 239   help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
    232 240   )
    233 241   
    234  - filter_group = parser.add_argument_group('Site filtering', 'Options to set site search scope')
     242 + filter_group = parser.add_argument_group(
     243 + 'Site filtering', 'Options to set site search scope'
     244 + )
    235 245   filter_group.add_argument(
    236 246   "-a",
    237 247   "--all-sites",
    skipped 31 lines
    269 279   modes_group = parser.add_argument_group(
    270 280   'Operating modes',
    271 281   'Various functions except the default search by a username. '
    272  - 'Modes are executed sequentially in the order of declaration.'
     282 + 'Modes are executed sequentially in the order of declaration.',
    273 283   )
    274 284   modes_group.add_argument(
    275 285   "--parse",
    skipped 20 lines
    296 306   "--stats",
    297 307   action="store_true",
    298 308   default=False,
    299  - help="Show database statistics (most frequent sites engines and tags)."
     309 + help="Show database statistics (most frequent sites engines and tags).",
    300 310   )
    301 311   
    302  - output_group = parser.add_argument_group('Output options', 'Options to change verbosity and view of the console output')
     312 + output_group = parser.add_argument_group(
     313 + 'Output options', 'Options to change verbosity and view of the console output'
     314 + )
    303 315   output_group.add_argument(
    304 316   "--print-not-found",
    305 317   action="store_true",
    skipped 48 lines
    354 366   help="Don't show progressbar.",
    355 367   )
    356 368   
    357  - report_group = parser.add_argument_group('Report formats', 'Supported formats of report files')
     369 + report_group = parser.add_argument_group(
     370 + 'Report formats', 'Supported formats of report files'
     371 + )
    358 372   report_group.add_argument(
    359 373   "-T",
    360 374   "--txt",
    skipped 85 lines
    446 460   print("Using the proxy: " + args.proxy)
    447 461   
    448 462   if args.parse_url:
    449  - extracted_ids = extract_ids_from_page(args.parse_url, logger, timeout=args.timeout)
     463 + extracted_ids = extract_ids_from_page(
     464 + args.parse_url, logger, timeout=args.timeout
     465 + )
    450 466   usernames.update(extracted_ids)
    451 467   
    452 468   if args.tags:
    skipped 194 lines
  • ■ ■ ■ ■ ■ ■
    maigret/notify.py
    skipped 281 lines
    282 282   sys.stdout.write("\x1b[1K\r")
    283 283   print(notify)
    284 284   
     285 + return notify
     286 + 
    285 287   def __str__(self):
    286 288   """Convert Object To String.
    287 289   
    skipped 10 lines
  • ■ ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 14364 lines
    14365 14365   "ru"
    14366 14366   ],
    14367 14367   "checkType": "response_url",
     14368 + "regexCheck": "^(?!id\\d)\\w*$",
    14368 14369   "alexaRank": 27,
    14369 14370   "urlMain": "https://vk.com/",
    14370 14371   "url": "https://vk.com/{username}",
    skipped 8 lines
    14379 14380   "checkType": "response_url",
    14380 14381   "alexaRank": 27,
    14381 14382   "urlMain": "https://vk.com/",
     14383 + "regexCheck": "^\\d+$",
    14382 14384   "url": "https://vk.com/id{username}",
    14383 14385   "source": "VK",
    14384 14386   "usernameClaimed": "270433952",
    skipped 11815 lines
  • ■ ■ ■ ■ ■ ■
    maigret/sites.py
    skipped 2 lines
    3 3  import copy
    4 4  import json
    5 5  import sys
    6  -from typing import Optional, List, Dict, Any
     6 +from typing import Optional, List, Dict, Any, Tuple
    7 7   
    8 8  import requests
    9 9   
    skipped 135 lines
    145 145   return match_groups.groups()[-1].rstrip("/")
    146 146   
    147 147   return None
     148 + 
     149 + def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
     150 + if not self.url_regexp:
     151 + return None
     152 + 
     153 + match_groups = self.url_regexp.match(url)
     154 + if not match_groups:
     155 + return None
     156 + 
     157 + _id = match_groups.groups()[-1].rstrip("/")
     158 + _type = self.type
     159 + 
     160 + return _id, _type
    148 161   
    149 162   @property
    150 163   def pretty_name(self):
    skipped 303 lines
  • ■ ■ ■ ■ ■ ■
    maigret/utils.py
    skipped 54 lines
    55 55   url_main_part = self.extract_main_part(url)
    56 56   for c in self.UNSAFE_SYMBOLS:
    57 57   url_main_part = url_main_part.replace(c, f"\\{c}")
    58  - username_regexp = username_regexp or ".+?"
     58 + prepared_username_regexp = (username_regexp or ".+?").lstrip('^').rstrip('$')
    59 59   
    60  - url_regexp = url_main_part.replace("{username}", f"({username_regexp})")
     60 + url_regexp = url_main_part.replace(
     61 + "{username}", f"({prepared_username_regexp})"
     62 + )
    61 63   regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
    62 64   
    63 65   return re.compile(regexp_str)
    skipped 26 lines
  • ■ ■ ■ ■ ■ ■
    tests/test_cli.py
    skipped 50 lines
    51 51   assert args == Namespace(**want_args)
    52 52   
    53 53   
     54 +def test_args_search_mode_several_usernames(argparser):
     55 + args = argparser.parse_args('username1 username2'.split())
     56 + 
     57 + assert args.username == ['username1', 'username2']
     58 + 
     59 + want_args = dict(DEFAULT_ARGS)
     60 + want_args.update({'username': ['username1', 'username2']})
     61 + 
     62 + assert args == Namespace(**want_args)
     63 + 
     64 + 
    54 65  def test_args_self_check_mode(argparser):
    55 66   args = argparser.parse_args('--self-check --site GitHub'.split())
    56 67   
    skipped 2 lines
    59 70   {
    60 71   'self_check': True,
    61 72   'site_list': ['GitHub'],
    62  - 'username': [None],
     73 + 'username': [],
    63 74   }
    64 75   )
    65 76   
    skipped 16 lines
  • ■ ■ ■ ■ ■ ■
    tests/test_maigret.py
    skipped 4 lines
    5 5  import pytest
    6 6  from mock import Mock
    7 7   
    8  -from maigret.maigret import self_check, maigret, extract_ids_from_page, extract_ids_from_results
     8 +from maigret.maigret import self_check, maigret
     9 +from maigret.maigret import extract_ids_from_page, extract_ids_from_results, extract_ids_from_url
    9 10  from maigret.sites import MaigretSite
    10 11  from maigret.result import QueryResult, QueryStatus
    11 12   
    skipped 125 lines
    137 138   assert results == RESULTS_EXAMPLE
    138 139   
    139 140   
     141 +def test_extract_ids_from_url(default_db):
     142 + assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {'test': 'username'}
     143 + assert extract_ids_from_url('https://vk.com/id123', default_db) == {'123': 'vk_id'}
     144 + assert extract_ids_from_url('https://vk.com/ida123', default_db) == {'ida123': 'username'}
     145 + assert extract_ids_from_url('https://my.mail.ru/yandex.ru/dipres8904/', default_db) == {'dipres8904': 'username'}
     146 + assert extract_ids_from_url('https://reviews.yandex.ru/user/adbced123', default_db) == {'adbced123': 'yandex_public_id'}
     147 + 
     148 + 
    140 149  @pytest.mark.slow
    141 150  def test_extract_ids_from_page(test_db):
    142 151   logger = Mock()
    143  - found_ids = extract_ids_from_page('https://www.reddit.com/user/test', logger)
    144  - assert found_ids == {'test': 'username'}
     152 + extract_ids_from_page('https://www.reddit.com/user/test', logger) == {'test': 'username'}
    145 153   
    146 154   
    147 155  def test_extract_ids_from_results(test_db):
    skipped 1 lines
    149 157   TEST_EXAMPLE['Reddit']['ids_usernames'] = {'test1': 'yandex_public_id'}
    150 158   TEST_EXAMPLE['Reddit']['ids_links'] = ['https://www.reddit.com/user/test2']
    151 159   
    152  - found_ids = extract_ids_from_results(TEST_EXAMPLE, test_db)
    153  - assert found_ids == {'test1': 'yandex_public_id', 'test2': 'username'}
     160 + extract_ids_from_results(TEST_EXAMPLE, test_db) == {'test1': 'yandex_public_id', 'test2': 'username'}
    154 161   
  • ■ ■ ■ ■ ■ ■
    tests/test_notify.py
     1 +from maigret.errors import CheckError
     2 +from maigret.notify import QueryNotifyPrint
     3 +from maigret.result import QueryStatus, QueryResult
     4 + 
     5 + 
     6 +def test_notify_illegal():
     7 + n = QueryNotifyPrint(color=False)
     8 + 
     9 + assert n.update(QueryResult(
     10 + username="test",
     11 + status=QueryStatus.ILLEGAL,
     12 + site_name="TEST_SITE",
     13 + site_url_user="http://example.com/test"
     14 + )) == "[-] TEST_SITE: Illegal Username Format For This Site!"
     15 + 
     16 + 
     17 +def test_notify_claimed():
     18 + n = QueryNotifyPrint(color=False)
     19 + 
     20 + assert n.update(QueryResult(
     21 + username="test",
     22 + status=QueryStatus.CLAIMED,
     23 + site_name="TEST_SITE",
     24 + site_url_user="http://example.com/test"
     25 + )) == "[+] TEST_SITE: http://example.com/test"
     26 + 
     27 + 
     28 +def test_notify_available():
     29 + n = QueryNotifyPrint(color=False)
     30 + 
     31 + assert n.update(QueryResult(
     32 + username="test",
     33 + status=QueryStatus.AVAILABLE,
     34 + site_name="TEST_SITE",
     35 + site_url_user="http://example.com/test"
     36 + )) == "[-] TEST_SITE: Not found!"
     37 + 
     38 + 
     39 +def test_notify_unknown():
     40 + n = QueryNotifyPrint(color=False)
     41 + result = QueryResult(
     42 + username="test",
     43 + status=QueryStatus.UNKNOWN,
     44 + site_name="TEST_SITE",
     45 + site_url_user="http://example.com/test"
     46 + )
     47 + result.error = CheckError('Type', 'Reason')
     48 + 
     49 + assert n.update(result) == "[?] TEST_SITE: Type error: Reason"
     50 + 
  • ■ ■ ■ ■ ■ ■
    tests/test_utils.py
    skipped 67 lines
    68 68   ]
    69 69   
    70 70   url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
     71 + # combine parts variations
    71 72   for url_parts in itertools.product(*parts):
    72 73   url = ''.join(url_parts)
     74 + # ensure all combinations give valid main part
    73 75   assert URLMatcher.extract_main_part(url) == url_main_part
    74 76   assert not url_regexp.match(url) is None
    75 77   
    skipped 8 lines
    84 86   ['/', ''],
    85 87   ]
    86 88   
     89 + # combine parts variations
    87 90   for url_parts in itertools.product(*parts):
    88 91   url = ''.join(url_parts)
     92 + # ensure all combinations match pattern
    89 93   assert (
    90 94   URLMatcher.make_profile_url_regexp(url).pattern
    91 95   == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
    skipped 39 lines
Please wait...
Page is in error, reload to recover