Projects STRLCPY maigret Commits a30a0125
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    maigret/checking.py
    skipped 177 lines
    178 178   except Exception as e:
    179 179   logger.warning(f'Failed activation {method} for site {site.name}: {e}')
    180 180   
     181 + site_name = site.pretty_name
    181 182   # presense flags
    182 183   # True by default
    183 184   presense_flags = site.presense_strs
    skipped 13 lines
    197 198   if check_error:
    198 199   logger.debug(check_error)
    199 200   result = QueryResult(username,
    200  - site.name,
     201 + site_name,
    201 202   url,
    202 203   QueryStatus.UNKNOWN,
    203 204   query_time=response_time,
    skipped 7 lines
    211 212   is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
    212 213   if not is_absence_detected and is_presense_detected:
    213 214   result = QueryResult(username,
    214  - site.name,
     215 + site_name,
    215 216   url,
    216 217   QueryStatus.CLAIMED,
    217 218   query_time=response_time, tags=fulltags)
    218 219   else:
    219 220   result = QueryResult(username,
    220  - site.name,
     221 + site_name,
    221 222   url,
    222 223   QueryStatus.AVAILABLE,
    223 224   query_time=response_time, tags=fulltags)
    skipped 1 lines
    225 226   # Checks if the status code of the response is 2XX
    226 227   if (not status_code >= 300 or status_code < 200) and is_presense_detected:
    227 228   result = QueryResult(username,
    228  - site.name,
     229 + site_name,
    229 230   url,
    230 231   QueryStatus.CLAIMED,
    231 232   query_time=response_time, tags=fulltags)
    232 233   else:
    233 234   result = QueryResult(username,
    234  - site.name,
     235 + site_name,
    235 236   url,
    236 237   QueryStatus.AVAILABLE,
    237 238   query_time=response_time, tags=fulltags)
    skipped 5 lines
    243 244   # forward to some odd redirect).
    244 245   if 200 <= status_code < 300 and is_presense_detected:
    245 246   result = QueryResult(username,
    246  - site.name,
     247 + site_name,
    247 248   url,
    248 249   QueryStatus.CLAIMED,
    249 250   query_time=response_time, tags=fulltags)
    250 251   else:
    251 252   result = QueryResult(username,
    252  - site.name,
     253 + site_name,
    253 254   url,
    254 255   QueryStatus.AVAILABLE,
    255 256   query_time=response_time, tags=fulltags)
    skipped 384 lines
  • ■ ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 9834 lines
    9835 9835   "<title>Error 404</title>"
    9836 9836   ],
    9837 9837   "alexaRank": 2076,
     9838 + "source": "Instagram",
    9838 9839   "url": "https://www.picuki.com/profile/{username}",
    9839 9840   "urlMain": "https://www.picuki.com/",
    9840 9841   "usernameClaimed": "adam",
    skipped 2310 lines
    12151 12152   "us"
    12152 12153   ],
    12153 12154   "headers": {
    12154  - "authorization": "Bearer BQAjb32z4TLh0t19LDuYfk2BV3gUXCpqyUuy2gBOyJTN_2xoZlN4AW1B6ZVmdKMDcI3Hc8agrrQsKbQZE90"
     12155 + "authorization": "Bearer BQAEeuyBT6S535Anlx4wU-pfPjjgiE8r2e7j0eOSnwZjSvjFvQgDzxwV__03-WNbwxPKyGehoJ5pQCBwUqs"
    12155 12156   },
    12156 12157   "errors": {
    12157 12158   "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
    skipped 1297 lines
    13455 13456   "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
    13456 13457   "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
    13457 13458   "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
    13458  - "x-guest-token": "1386060728566681601"
     13459 + "x-guest-token": "1387733472027070474"
    13459 13460   },
    13460 13461   "errors": {
    13461 13462   "Bad guest token": "x-guest-token update required"
    skipped 370 lines
    13832 13833   "video"
    13833 13834   ],
    13834 13835   "headers": {
    13835  - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTkzMDI0NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.fN8PQIEkzQjfu7znGoIaLEP9Qr6bV8JbA2ZwpBSFI5E"
     13836 + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk2OTczNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.yLRq0lhenTYfe0EKKJsk5HZJZt3ykUVNBGuiMCC5HR4"
    13836 13837   },
    13837 13838   "activation": {
    13838 13839   "url": "https://vimeo.com/_rv/viewer",
    skipped 9762 lines
    23601 23602   "url": "https://tapd.co/{username}",
    23602 23603   "urlMain": "https://tapd.co",
    23603 23604   "usernameClaimed": "blue",
     23605 + "usernameUnclaimed": "noonewouldeverusethis7"
     23606 + },
     23607 + "wblitz.net": {
     23608 + "checkType": "message",
     23609 + "presenseStrs": [
     23610 + "profileBlock",
     23611 + "tournaments",
     23612 + "serverna",
     23613 + " role=",
     23614 + " name="
     23615 + ],
     23616 + "absenceStrs": [
     23617 + "<html><head><title>404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430</title></head><body><h2>404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430</h2></body></html>"
     23618 + ],
     23619 + "url": "https://wblitz.net/stat/ru/{username}",
     23620 + "urlMain": "https://wblitz.net",
     23621 + "usernameClaimed": "lucklev12",
     23622 + "usernameUnclaimed": "noonewouldeverusethis7"
     23623 + },
     23624 + "unc.ua": {
     23625 + "checkType": "message",
     23626 + "presenseStrs": [
     23627 + "page-user_profile"
     23628 + ],
     23629 + "absenceStrs": [
     23630 + "Error Site"
     23631 + ],
     23632 + "url": "https://unc.ua/{username}",
     23633 + "urlMain": "https://unc.ua",
     23634 + "usernameClaimed": "admin",
     23635 + "usernameUnclaimed": "noonewouldeverusethis7"
     23636 + },
     23637 + "kloomba.com": {
     23638 + "checkType": "message",
     23639 + "presenseStrs": [
     23640 + "name",
     23641 + " role=",
     23642 + " main"
     23643 + ],
     23644 + "absenceStrs": [
     23645 + "error-page"
     23646 + ],
     23647 + "url": "https://kloomba.com/users/{username}",
     23648 + "urlMain": "https://kloomba.com",
     23649 + "usernameClaimed": "dima",
     23650 + "usernameUnclaimed": "noonewouldeverusethis7"
     23651 + },
     23652 + "nevrotic.net": {
     23653 + "checkType": "message",
     23654 + "presenseStrs": [
     23655 + "profile-tabs",
     23656 + " profile-rating"
     23657 + ],
     23658 + "absenceStrs": [
     23659 + "table-404"
     23660 + ],
     23661 + "url": "http://nevrotic.net/user/{username}",
     23662 + "urlMain": "http://nevrotic.net",
     23663 + "usernameClaimed": "admin",
     23664 + "usernameUnclaimed": "noonewouldeverusethis7"
     23665 + },
     23666 + "pikabu.monster": {
     23667 + "checkType": "message",
     23668 + "presenseStrs": [
     23669 + "usertotalcomments",
     23670 + " usertotalposts"
     23671 + ],
     23672 + "absenceStrs": [
     23673 + "<title>\u041e\u0448\u0438\u0431\u043a\u0430</title>"
     23674 + ],
     23675 + "source": "Pikabu",
     23676 + "url": "https://pikabu.monster/user/{username}-summary",
     23677 + "urlMain": "https://pikabu.monster",
     23678 + "usernameClaimed": "Avezenit",
    23604 23679   "usernameUnclaimed": "noonewouldeverusethis7"
    23605 23680   }
    23606 23681   },
    skipped 157 lines
  • ■ ■ ■ ■ ■ ■
    maigret/sites.py
    skipped 68 lines
    69 69   self.engine_obj = None
    70 70   self.request_future = None
    71 71   self.alexa_rank = None
     72 + self.source = None
    72 73   
    73 74   for k, v in information.items():
    74 75   self.__dict__[CaseConverter.camel_to_snake(k)] = v
    skipped 23 lines
    98 99   return match_groups.groups()[-1].rstrip('/')
    99 100   
    100 101   return None
     102 + 
     103 + @property
     104 + def pretty_name(self):
     105 + if self.source:
     106 + return f'{self.name} [{self.source}]'
     107 + return self.name
    101 108   
    102 109   @property
    103 110   def json(self):
    skipped 274 lines
  • ■ ■ ■ ■ ■
    maigret/submit.py
    skipped 9 lines
    10 10   
    11 11  SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
    12 12   
     13 +HEADERS = {
     14 + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
     15 +}
     16 + 
    13 17  RATIO = 0.6
    14 18  TOP_FEATURES = 5
    15 19  URL_RE = re.compile(r'https?://(www\.)?')
    skipped 105 lines
    121 125   return None
    122 126   
    123 127   
    124  -async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
     128 +async def check_features_manually(db, url_exists, url_mainpage, cookie_file, redirects=False):
    125 129   url_parts = url_exists.split('/')
    126 130   supposed_username = url_parts[-1]
    127 131   new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
    skipped 10 lines
    138 142   cookie_jar = await import_aiohttp_cookies(cookie_file)
    139 143   cookie_dict = {c.key: c.value for c in cookie_jar}
    140 144   
    141  - a = requests.get(url_exists, cookies=cookie_dict).text
    142  - b = requests.get(url_not_exists, cookies=cookie_dict).text
     145 + exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
     146 + non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
     147 + 
     148 + a = exists_resp.text
     149 + b = non_exists_resp.text
    143 150   
    144 151   tokens_a = set(a.split('"'))
    145 152   tokens_b = set(b.split('"'))
    146 153   
    147 154   a_minus_b = tokens_a.difference(tokens_b)
    148 155   b_minus_a = tokens_b.difference(tokens_a)
     156 + 
     157 + if len(a_minus_b) == len(b_minus_a) == 0:
     158 + print('The pages for existing and non-existing account are the same!')
    149 159   
    150 160   top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
    151 161   
    skipped 85 lines
Please wait...
Page is in error, reload to recover