Projects STRLCPY snscrape Commits 7b967ff8
🤬
  • ■ ■ ■ ■ ■
    snscrape/modules/twitter.py
    skipped 4 lines
    5 5  import logging
    6 6  import re
    7 7  import snscrape.base
     8 +import time
    8 9  import typing
    9 10  import urllib.parse
    10 11   
    skipped 87 lines
    98 99   def _get_guest_token(self):
    99 100   logger.info(f'Retrieving guest token from search page')
    100 101   r = self._get(self._baseUrl, headers = {'User-Agent': self._userAgent})
    101  - if 'gt' not in r.cookies:
    102  - raise snscrape.base.ScraperException("Twitter didn't set the cookie")
    103  - return r.cookies['gt']
     102 + match = re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)
     103 + if match:
     104 + logger.debug('Found guest token in HTML')
     105 + return match.group(1)
     106 + if 'gt' in r.cookies:
     107 + logger.debug('Found guest token in cookies')
     108 + return r.cookies['gt']
     109 + raise snscrape.base.ScraperException('Unable to find guest token')
    104 110   
    105 111   def _check_scroll_response(self, r):
    106 112   if r.status_code == 429:
    skipped 16 lines
    123 129   while True:
    124 130   if not guestToken:
    125 131   guestToken = self._get_guest_token()
     132 + self._session.cookies.set('gt', guestToken, domain = '.twitter.com', path = '/', secure = True, expires = time.time() + 10800)
    126 133   headers['x-guest-token'] = guestToken
    127 134   
    128 135   logger.info(f'Retrieving scroll page {cursor}')
    skipped 33 lines
    162 169   if r.status_code == 429:
    163 170   guestToken = None
    164 171   del self._session.cookies['gt']
     172 + del headers['x-guest-token']
    165 173   continue
    166 174   try:
    167 175   obj = r.json()
    skipped 208 lines
Please wait...
Page is in error, reload to recover