Projects STRLCPY maigret Commits bfaf276f
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    maigret/checking.py
    skipped 18 lines
    19 19  from .result import QueryResult, QueryStatus
    20 20  from .sites import MaigretDatabase, MaigretSite
    21 21  from .types import CheckError
     22 +from .utils import get_random_user_agent
    22 23   
    23 24   
    24 25  supported_recursive_search_ids = (
    skipped 358 lines
    383 384   results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
    384 385   
    385 386   headers = {
    386  - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
     387 + 'User-Agent': get_random_user_agent(),
    387 388   }
    388 389   
    389 390   headers.update(site.headers)
    skipped 251 lines
  • ■ ■ ■ ■
    maigret/maigret.py
    skipped 274 lines
    275 275   site_data = get_top_sites_for_id(args.id_type)
    276 276   
    277 277   if args.new_site_to_submit:
    278  - is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file)
     278 + is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file, logger)
    279 279   if is_submitted:
    280 280   db.save_to_file(args.db_file)
    281 281   
    skipped 155 lines
  • ■ ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 5435 lines
    5436 5436   },
    5437 5437   "Gitmemory": {
    5438 5438   "tags": [
    5439  - "coding",
    5440  - "github",
    5441  - "in"
     5439 + "coding"
    5442 5440   ],
    5443 5441   "checkType": "message",
    5444 5442   "absenceStrs": "Oops,404",
    5445 5443   "alexaRank": 6827,
     5444 + "source": "GitHub",
    5446 5445   "url": "https://www.gitmemory.com/{username}",
    5447 5446   "urlMain": "https://www.gitmemory.com",
    5448 5447   "usernameClaimed": "adam",
    skipped 297 lines
    5746 5745   },
    5747 5746   "Gramho": {
    5748 5747   "tags": [
    5749  - "instagram",
    5750  - "jp",
    5751 5748   "photo"
    5752 5749   ],
    5753 5750   "checkType": "status_code",
    5754 5751   "alexaRank": 4445,
     5752 + "source": "Instagram",
    5755 5753   "url": "https://gramho.com/explore-hashtag/{username}",
    5756 5754   "urlMain": "https://gramho.com/",
    5757 5755   "usernameClaimed": "adam",
    skipped 1470 lines
    7228 7226   },
    7229 7227   "Libraries": {
    7230 7228   "tags": [
    7231  - "coding",
    7232  - "github",
    7233  - "in"
     7229 + "coding"
    7234 7230   ],
    7235 7231   "regexCheck": "^[^\\.]+$",
    7236 7232   "checkType": "status_code",
    7237 7233   "alexaRank": 65552,
     7234 + "source": "GitHub",
    7238 7235   "url": "https://libraries.io/github/{username}/",
    7239 7236   "urlMain": "https://libraries.io",
    7240 7237   "usernameClaimed": "snooppr",
    skipped 2584 lines
    9825 9822   },
    9826 9823   "Picuki": {
    9827 9824   "tags": [
    9828  - "instagram",
    9829  - "photo",
    9830  - "us"
     9825 + "photo"
    9831 9826   ],
    9832 9827   "checkType": "message",
    9833 9828   "absenceStrs": [
    skipped 1888 lines
    11722 11717   },
    11723 11718   "Shutterstock": {
    11724 11719   "tags": [
    11725  - "fi",
    11726  - "us"
     11720 + "photo",
     11721 + "music",
     11722 + "stock"
    11727 11723   ],
    11728 11724   "checkType": "message",
    11729 11725   "absenceStrs": "T\u00e4m\u00e4p\u00e4 yll\u00e4tt\u00e4v\u00e4\u00e4...",
    skipped 514 lines
    12244 12240   },
    12245 12241   "Steam": {
    12246 12242   "tags": [
    12247  - "gaming",
    12248  - "steam",
    12249  - "us"
     12243 + "gaming"
    12250 12244   ],
    12251 12245   "checkType": "message",
    12252 12246   "absenceStrs": "The specified profile could not be found",
    skipped 3 lines
    12256 12250   "usernameClaimed": "blue",
    12257 12251   "usernameUnclaimed": "noonewouldeverusethis7"
    12258 12252   },
    12259  - "SteamGroup": {
     12253 + "Steam (by id)": {
     12254 + "tags": [
     12255 + "gaming"
     12256 + ],
     12257 + "type": "steam_id",
     12258 + "checkType": "message",
     12259 + "absenceStrs": "The specified profile could not be found",
     12260 + "alexaRank": 370,
     12261 + "source": "Steam",
     12262 + "url": "https://steamcommunity.com/profiles/{username}",
     12263 + "urlMain": "https://steamcommunity.com/",
     12264 + "usernameClaimed": "76561197960287930",
     12265 + "usernameUnclaimed": "noonewouldeverusethis7"
     12266 + },
     12267 + "Steam (Group)": {
    12260 12268   "tags": [
    12261  - "steam",
    12262  - "us"
     12269 + "gaming"
    12263 12270   ],
    12264 12271   "checkType": "message",
    12265 12272   "absenceStrs": "No group could be retrieved for the given URL",
    12266 12273   "alexaRank": 370,
     12274 + "source": "Steam",
    12267 12275   "url": "https://steamcommunity.com/groups/{username}",
    12268 12276   "urlMain": "https://steamcommunity.com/",
    12269 12277   "usernameClaimed": "blue",
    skipped 1 lines
    12271 12279   },
    12272 12280   "Steamid": {
    12273 12281   "tags": [
    12274  - "eg",
    12275  - "gaming",
    12276  - "steam",
    12277  - "us"
     12282 + "gaming"
    12278 12283   ],
    12279 12284   "checkType": "message",
    12280 12285   "absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>",
    12281 12286   "alexaRank": 302717,
     12287 + "source": "Steam",
    12282 12288   "url": "https://steamid.uk/profile/{username}",
    12283 12289   "urlMain": "https://steamid.uk/",
    12284 12290   "usernameClaimed": "blue",
    skipped 1 lines
    12286 12292   },
    12287 12293   "Steamid (by id)": {
    12288 12294   "tags": [
    12289  - "eg",
    12290  - "gaming",
    12291  - "steam",
    12292  - "us"
     12295 + "gaming"
    12293 12296   ],
    12294 12297   "type": "steam_id",
    12295 12298   "checkType": "message",
    12296 12299   "absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>",
    12297 12300   "alexaRank": 302717,
     12301 + "source": "Steam",
    12298 12302   "url": "https://steamid.uk/profile/{username}",
    12299 12303   "urlMain": "https://steamid.uk/",
    12300 12304   "usernameClaimed": "76561197982198022",
    skipped 1 lines
    12302 12306   },
    12303 12307   "Steamidfinder": {
    12304 12308   "tags": [
    12305  - "gaming",
    12306  - "steam",
    12307  - "us"
     12309 + "gaming"
    12308 12310   ],
    12309 12311   "checkType": "message",
    12310 12312   "presenseStrs": [
    skipped 3 lines
    12314 12316   "could not be found."
    12315 12317   ],
    12316 12318   "alexaRank": 72851,
     12319 + "source": "Steam",
    12317 12320   "url": "https://steamidfinder.com/lookup/{username}",
    12318 12321   "urlMain": "https://steamidfinder.com",
    12319 12322   "usernameClaimed": "channel",
    skipped 1 lines
    12321 12324   },
    12322 12325   "Steamidfinder (by id)": {
    12323 12326   "tags": [
    12324  - "gaming",
    12325  - "steam",
    12326  - "us"
     12327 + "gaming"
    12327 12328   ],
    12328 12329   "type": "steam_id",
    12329 12330   "checkType": "message",
    skipped 4 lines
    12334 12335   "could not be found."
    12335 12336   ],
    12336 12337   "alexaRank": 72851,
     12338 + "source": "Steam",
    12337 12339   "url": "https://steamidfinder.com/lookup/{username}",
    12338 12340   "urlMain": "https://steamidfinder.com",
    12339 12341   "usernameClaimed": "76561197982198022",
    skipped 2348 lines
    14688 14690   "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u0441\u043a\u0440\u044b\u043b \u0441\u0432\u043e\u044e \u043f\u0443\u0431\u043b\u0438\u0447\u043d\u0443\u044e \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0443"
    14689 14691   ],
    14690 14692   "alexaRank": 48,
     14693 + "source": "Yandex",
    14691 14694   "url": "https://reviews.yandex.ru/user/{username}",
    14692 14695   "urlMain": "https://yandex.ru/",
    14693 14696   "usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c",
    skipped 6 lines
    14700 14703   ],
    14701 14704   "checkType": "status_code",
    14702 14705   "alexaRank": 48,
     14706 + "source": "Yandex",
    14703 14707   "url": "https://yandex.ru/bugbounty/researchers/{username}/",
    14704 14708   "urlMain": "https://yandex.ru/bugbounty/",
    14705 14709   "usernameClaimed": "pyrk1",
    skipped 16 lines
    14722 14726   ],
    14723 14727   "absenceStrs": "cl-not-found-content__title",
    14724 14728   "alexaRank": 48,
     14729 + "source": "Yandex",
    14725 14730   "url": "https://yandex.ru/collections/user/{username}",
    14726 14731   "urlMain": "https://yandex.ru/collections/",
    14727 14732   "usernameClaimed": "yandex",
    14728 14733   "usernameUnclaimed": "noonewouldeverusethis7"
    14729 14734   },
    14730 14735   "YandexLocal": {
     14736 + "disabled": true,
    14731 14737   "tags": [
    14732 14738   "ru"
    14733 14739   ],
    14734 14740   "type": "yandex_public_id",
    14735 14741   "checkType": "status_code",
    14736 14742   "alexaRank": 48,
     14743 + "source": "Yandex",
    14737 14744   "url": "https://local.yandex.ru/users/{username}",
    14738 14745   "urlMain": "https://local.yandex.ru/",
    14739 14746   "usernameClaimed": "gp7v6ufryzw3m1nvdj4ycexa8g",
    skipped 7 lines
    14747 14754   "checkType": "message",
    14748 14755   "absenceStrs": "//yastatic.net/market-export/_/i/zero-state/404.svg",
    14749 14756   "alexaRank": 48,
     14757 + "source": "Yandex",
    14750 14758   "url": "https://market.yandex.ru/user/{username}",
    14751 14759   "urlMain": "https://market.yandex.ru/",
    14752 14760   "usernameClaimed": "6j2uh4rhp5d9gqgbynaqy2p75m",
    skipped 10 lines
    14763 14771   "urlProbe": "https://music.yandex.ru/handlers/library.jsx?owner={username}",
    14764 14772   "checkType": "status_code",
    14765 14773   "alexaRank": 48,
     14774 + "source": "Yandex",
    14766 14775   "url": "https://music.yandex.ru/users/{username}/playlists",
    14767 14776   "urlMain": "https://music.yandex.ru/",
    14768 14777   "usernameClaimed": "YandexMusic",
    skipped 16 lines
    14785 14794   "type": "yandex_public_id",
    14786 14795   "checkType": "status_code",
    14787 14796   "alexaRank": 48,
     14797 + "source": "Yandex",
    14788 14798   "url": "https://yandex.ru/q/profile/{username}",
    14789 14799   "urlMain": "https://yandex.ru/q/",
    14790 14800   "usernameClaimed": "blue",
    skipped 5 lines
    14796 14806   ],
    14797 14807   "checkType": "status_code",
    14798 14808   "alexaRank": 48,
     14809 + "source": "Yandex",
    14799 14810   "url": "https://zen.yandex.ru/{username}",
    14800 14811   "urlMain": "https://zen.yandex.ru",
    14801 14812   "usernameClaimed": "tema",
    skipped 6 lines
    14808 14819   "type": "yandex_public_id",
    14809 14820   "checkType": "status_code",
    14810 14821   "alexaRank": 48,
     14822 + "source": "Yandex",
    14811 14823   "url": "https://zen.yandex.ru/user/{username}",
    14812 14824   "urlMain": "https://zen.yandex.ru",
    14813 14825   "usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c",
    skipped 3310 lines
    18124 18136   "tracr.co": {
    18125 18137   "disabled": true,
    18126 18138   "tags": [
    18127  - "gaming",
    18128  - "discord"
     18139 + "gaming"
    18129 18140   ],
    18130 18141   "errors": {
    18131 18142   "502 - Bad Gateway": "Site error",
    skipped 2 lines
    18134 18145   "regexCheck": "^[A-Za-z0-9]{2,32}$",
    18135 18146   "checkType": "message",
    18136 18147   "absenceStrs": "No search results",
     18148 + "source": "Discord",
    18137 18149   "url": "https://tracr.co/users/1/{username}",
    18138 18150   "urlMain": "https://tracr.co/",
    18139 18151   "usernameClaimed": "blue",
    skipped 31 lines
    18171 18183   },
    18172 18184   "uID.me (by username)": {
    18173 18185   "tags": [
    18174  - "ru",
    18175  - "ucoz"
     18186 + "ru"
    18176 18187   ],
    18177 18188   "checkType": "status_code",
    18178 18189   "alexaRank": 24715,
    skipped 4 lines
    18183 18194   },
    18184 18195   "uID.me (by uguid)": {
    18185 18196   "tags": [
    18186  - "ru",
    18187  - "ucoz"
     18197 + "ru"
    18188 18198   ],
    18189 18199   "type": "uidme_uguid",
    18190 18200   "checkType": "status_code",
    skipped 4634 lines
    22825 22835   ],
    22826 22836   "engine": "engineRedirect",
    22827 22837   "alexaRank": 72,
     22838 + "source": "GitHub",
    22828 22839   "url": "https://gist.github.com/{username}",
    22829 22840   "urlMain": "https://gist.github.com",
    22830 22841   "usernameUnclaimed": "noonewouldeverusethis7",
    skipped 833 lines
    23664 23675   "usernameUnclaimed": "noonewouldeverusethis7"
    23665 23676   },
    23666 23677   "pikabu.monster": {
     23678 + "tags": [
     23679 + "ru"
     23680 + ],
    23667 23681   "checkType": "message",
    23668 23682   "presenseStrs": [
    23669 23683   "usertotalcomments",
    skipped 6 lines
    23676 23690   "url": "https://pikabu.monster/user/{username}-summary",
    23677 23691   "urlMain": "https://pikabu.monster",
    23678 23692   "usernameClaimed": "Avezenit",
     23693 + "usernameUnclaimed": "noonewouldeverusethis7"
     23694 + },
     23695 + "steamdb.info": {
     23696 + "tags": [
     23697 + "gaming"
     23698 + ],
     23699 + "type": "steam_id",
     23700 + "checkType": "message",
     23701 + "presenseStrs": [
     23702 + "profileForm",
     23703 + " player-name",
     23704 + " progress",
     23705 + " data-not-game="
     23706 + ],
     23707 + "absenceStrs": [
     23708 + "error-page",
     23709 + " Error 404"
     23710 + ],
     23711 + "source": "Steam",
     23712 + "url": "https://steamdb.info/calculator/{username}",
     23713 + "urlMain": "https://steamdb.info",
     23714 + "usernameClaimed": "76561197978866368",
     23715 + "usernameUnclaimed": "noonewouldeverusethis7"
     23716 + },
     23717 + "Niftygateway": {
     23718 + "urlProbe": "https://api.niftygateway.com/user/profile-and-offchain-nifties-by-url/?profile_url={username}",
     23719 + "checkType": "message",
     23720 + "presenseStrs": [
     23721 + "profile_url",
     23722 + "name",
     23723 + "profile_pic_url",
     23724 + "verified",
     23725 + "bio"
     23726 + ],
     23727 + "absenceStrs": [
     23728 + "not_found",
     23729 + " User profile not located in our system."
     23730 + ],
     23731 + "url": "https://niftygateway.com/profile/{username}",
     23732 + "urlMain": "https://api.niftygateway.com",
     23733 + "usernameClaimed": "admin",
     23734 + "usernameUnclaimed": "noonewouldeverusethis7"
     23735 + },
     23736 + "opensea.io": {
     23737 + "checkType": "message",
     23738 + "presenseStrs": [
     23739 + "username\\",
     23740 + "lastSale",
     23741 + "publicUsername",
     23742 + "name",
     23743 + "user"
     23744 + ],
     23745 + "absenceStrs": [
     23746 + "><div width="
     23747 + ],
     23748 + "url": "https://opensea.io/accounts/{username}",
     23749 + "urlMain": "https://opensea.io",
     23750 + "usernameClaimed": "admin",
    23679 23751   "usernameUnclaimed": "noonewouldeverusethis7"
    23680 23752   }
    23681 23753   },
    skipped 157 lines
  • ■ ■ ■ ■ ■
    maigret/sites.py
    skipped 14 lines
    15 15   'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
    16 16   'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
    17 17   'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
    18  - 'science', 'medicine',
     18 + 'science', 'medicine', 'reading', 'stock',
    19 19  ]
    20 20   
    21 21   
    skipped 177 lines
    199 199   normalized_tags = list(map(str.lower, tags))
    200 200   
    201 201   is_name_ok = lambda x: x.name.lower() in normalized_names
     202 + is_source_ok = lambda x: x.source and x.source.lower() in normalized_names
    202 203   is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
    203 204   is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
    204 205   is_disabled_needed = lambda x: not x.disabled or ('disabled' in tags or disabled)
    205 206   is_id_type_ok = lambda x: x.type == id_type
    206 207   
    207 208   filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
    208  - filter_names_fun = lambda x: not names or is_name_ok(x)
     209 + filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
    209 210   
    210 211   filter_fun = lambda x: filter_tags_engines_fun(x) and filter_names_fun(x) \
    211 212   and is_disabled_needed(x) and is_id_type_ok(x)
    skipped 173 lines
  • ■ ■ ■ ■ ■ ■
    maigret/submit.py
    skipped 2 lines
    3 3  import requests
    4 4   
    5 5  from .checking import *
     6 +from .utils import get_random_user_agent
    6 7   
    7 8   
    8 9  DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
    skipped 2 lines
    11 12  SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
    12 13   
    13 14  HEADERS = {
    14  - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
     15 + 'User-Agent': get_random_user_agent(),
    15 16  }
    16 17   
    17 18  RATIO = 0.6
    skipped 107 lines
    125 126   return None
    126 127   
    127 128   
    128  -async def check_features_manually(db, url_exists, url_mainpage, cookie_file, redirects=False):
     129 +async def check_features_manually(db, url_exists, url_mainpage, cookie_file, logger, redirects=True):
    129 130   url_parts = url_exists.split('/')
    130 131   supposed_username = url_parts[-1]
    131 132   new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
    skipped 11 lines
    143 144   cookie_dict = {c.key: c.value for c in cookie_jar}
    144 145   
    145 146   exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
     147 + logger.debug(exists_resp.status_code)
     148 + logger.debug(exists_resp.text)
     149 + 
    146 150   non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
     151 + logger.debug(non_exists_resp.status_code)
     152 + logger.debug(non_exists_resp.text)
     153 + 
    147 154   
    148 155   a = exists_resp.text
    149 156   b = non_exists_resp.text
    skipped 37 lines
    187 194   site = MaigretSite(url_mainpage.split('/')[-1], site_data)
    188 195   return site
    189 196   
    190  -async def submit_dialog(db, url_exists, cookie_file):
     197 + 
     198 +async def submit_dialog(db, url_exists, cookie_file, logger):
    191 199   domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
    192 200   domain_raw = domain_raw.split('/')[0]
    193 201   
    skipped 14 lines
    208 216   sites = await detect_known_engine(db, url_exists, url_mainpage)
    209 217   if not sites:
    210 218   print('Unable to detect site engine, lets generate checking features')
    211  - sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file)]
     219 + sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file, logger)]
    212 220   
    213  - print(sites[0].__dict__)
     221 + logger.debug(sites[0].__dict__)
    214 222   
    215 223   sem = asyncio.Semaphore(1)
    216  - log_level = logging.INFO
    217  - logging.basicConfig(
    218  - format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
    219  - datefmt='%H:%M:%S',
    220  - level=log_level
    221  - )
    222  - logger = logging.getLogger('site-submit')
    223  - logger.setLevel(log_level)
    224 224   
    225 225   found = False
    226 226   chosen_site = None
    skipped 9 lines
    236 236   print('Try to run this mode again and increase features count or choose others.')
    237 237   else:
    238 238   if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
    239  - print(chosen_site.json)
     239 + logger.debug(chosen_site.json)
    240 240   site_data = chosen_site.strip_engine_data()
    241  - print(site_data.json)
     241 + logger.debug(site_data.json)
    242 242   db.update_site(site_data)
    243 243   return True
    244 244   
    skipped 2 lines
  • ■ ■ ■ ■ ■ ■
    maigret/utils.py
    1 1  import re
     2 +import random
     3 + 
     4 + 
     5 +DEFAULT_USER_AGENTS = [
     6 + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
     7 +]
    2 8   
    3 9   
    4 10  class CaseConverter:
    skipped 72 lines
    77 83   
    78 84   return text
    79 85   
     86 + 
     87 +def get_random_user_agent():
     88 + return random.choice(DEFAULT_USER_AGENTS)
     89 + 
Please wait...
Page is in error, reload to recover