Projects STRLCPY snscrape Commits c18ca0f0
🤬
  • ■ ■ ■ ■ ■ ■
    snscrape/modules/instagram.py
    skipped 95 lines
    96 96   def _check_json_callback(self, r):
    97 97   if r.status_code != 200:
    98 98   return False, f'status code {r.status_code}'
     99 + if r.url.startswith('https://www.instagram.com/accounts/login/'):
     100 + raise snscrape.base.ScraperException('Redirected to login page')
    99 101   try:
    100 102   obj = json.loads(r.text)
    101 103   except json.JSONDecodeError as e:
    skipped 141 lines
  • ■ ■ ■ ■ ■
    snscrape/modules/twitter.py
    skipped 286 lines
    287 287   def reset(self):
    288 288   super().reset()
    289 289   with self._lock:
    290  - os.remove(self._file)
     290 + _logger.info(f'Deleting guest token file {self._file}')
     291 + try:
     292 + os.remove(self._file)
     293 + except FileNotFoundError:
     294 + # Another process likely already removed the file
     295 + pass
    291 296   
    292 297   
    293 298  class _TwitterAPIType(enum.Enum):
    skipped 45 lines
    339 344   r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response)
    340 345   o = r.json()
    341 346   if not o.get('guest_token'):
    342  - raise snscrape.base.ScraperError('Unable to retrieve guest token')
     347 + raise snscrape.base.ScraperException('Unable to retrieve guest token')
    343 348   self._guestTokenManager.token = o['guest_token']
    344 349   assert self._guestTokenManager.token
    345 350   _logger.debug(f'Using guest token {self._guestTokenManager.token}')
    skipped 301 lines
    647 652   #TODO Include result['softInterventionPivot'] in the Tweet object
    648 653   result = result['tweet']
    649 654   else:
    650  - raise snscrape.base.ScraperError(f'Unknown result type {result["__typename"]!r}')
     655 + raise snscrape.base.ScraperException(f'Unknown result type {result["__typename"]!r}')
    651 656   tweet = result['legacy']
    652 657   userId = int(result['core']['user_results']['result']['rest_id'])
    653 658   user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId)
    skipped 10 lines
    664 669   kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
    665 670   else:
    666 671   kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id']))
     672 + elif 'quoted_status_id_str' in tweet:
     673 + kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
    667 674   if 'card' in result:
    668 675   kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
    669 676   return self._make_tweet(tweet, user, **kwargs)
    skipped 437 lines
  • ■ ■ ■ ■ ■
    snscrape/modules/vkontakte.py
    skipped 176 lines
    177 177   continue
    178 178   if 'data-video' in a.attrs:
    179 179   # Video
     180 + if 'data-link-attr' in a.attrs:
     181 + hrefUrl = urllib.parse.unquote(a.attrs['data-link-attr'].split('to=')[1].split('&')[0])
     182 + else:
     183 + hrefUrl = f'https://vk.com{a["href"]}'
    180 184   video = Video(
    181 185   id = a['data-video'],
    182 186   list = a['data-list'],
    183 187   duration = int(a['data-duration']),
    184  - url = f'https://vk.com{a["href"]}',
     188 + url = hrefUrl,
    185 189   thumbUrl = a['style'][(begin := a['style'].find('background-image: url(') + 22) : a['style'].find(')', begin)],
    186 190   )
    187 191   continue
    skipped 202 lines
  • ■ ■ ■ ■
    snscrape/modules/weibo.py
    skipped 69 lines
    70 70   _logger.warning('User does not exist')
    71 71   self._user = _userDoesNotExist
    72 72   else:
    73  - raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})')
     73 + raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})')
    74 74   
    75 75   def _check_timeline_response(self, r):
    76 76   if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}':
    skipped 76 lines
Please wait...
Page is in error, reload to recover