Projects STRLCPY maigret Commits 5c05cfa5
🤬
  • Fixed BongaCams, links parsing improved (#297)

    * Fixed BongaCams, links parsing improved
    
    * Fixed tests
  • Loading...
  • Soxoj committed with GitHub 3 years ago
    5c05cfa5
    1 parent 3e884d4b
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 2143 lines
    2144 2144   "cz",
    2145 2145   "webcam"
    2146 2146   ],
    2147  - "checkType": "status_code",
     2147 + "absenceStrs": [
     2148 + "<link rel=\"preconnect\""
     2149 + ],
     2150 + "presenseStrs": [
     2151 + "<title>Informa\u00e7\u00e3o e p\u00e1gina"
     2152 + ],
     2153 + "checkType": "message",
    2148 2154   "alexaRank": 30,
    2149 2155   "urlMain": "https://pt.bongacams.com",
    2150 2156   "url": "https://pt.bongacams.com/profile/{username}",
    2151  - "usernameClaimed": "asuna-black",
     2157 + "usernameClaimed": "Icehotangel",
    2152 2158   "usernameUnclaimed": "noonewouldeverusethis77777"
    2153 2159   },
    2154 2160   "Bookandreader": {
    skipped 10971 lines
    13126 13132   "checkType": "response_url",
    13127 13133   "alexaRank": 82345,
    13128 13134   "urlMain": "http://sprashivai.ru",
    13129  - "url": "http://sprashivai.ru/{username}?sl",
     13135 + "url": "http://sprashivai.ru/{username}",
    13130 13136   "usernameClaimed": "red",
    13131 13137   "usernameUnclaimed": "noonewouldeverusethis7"
    13132 13138   },
    skipped 15948 lines
  • ■ ■ ■ ■ ■ ■
    maigret/utils.py
    skipped 41 lines
    42 42   
    43 43   
    44 44  class URLMatcher:
    45  - _HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
     45 + _HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
    46 46   HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
    47 47   UNSAFE_SYMBOLS = ".?"
    48 48   
    skipped 17 lines
    66 66   )
    67 67   regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
    68 68   
    69  - return re.compile(regexp_str)
     69 + return re.compile(regexp_str, re.IGNORECASE)
    70 70   
    71 71   
    72 72  def ascii_data_display(data: str) -> Any:
    skipped 50 lines
  • ■ ■ ■ ■
    tests/test_sites.py
    skipped 114 lines
    115 115   
    116 116   assert (
    117 117   db.sites[0].url_regexp.pattern
    118  - == r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
     118 + == r'^https?://(www.|m.)?forum\.amperka\.ru/members/\?username=(.+?)$'
    119 119   )
    120 120   assert (
    121 121   db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
    skipped 84 lines
  • ■ ■ ■ ■
    tests/test_utils.py
    skipped 97 lines
    98 98   # ensure all combinations match pattern
    99 99   assert (
    100 100   URLMatcher.make_profile_url_regexp(url).pattern
    101  - == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
     101 + == r'^https?://(www.|m.)?flickr\.com/photos/(.+?)$'
    102 102   )
    103 103   
    104 104   
    skipped 42 lines
Please wait...
Page is in error, reload to recover