Projects STRLCPY snscrape Commits b13e62eb
🤬
  • ■ ■ ■ ■ ■
    snscrape/_cli.py
    skipped 132 lines
    133 133   fp.write('Stack:\n')
    134 134   for frameRecord in trace:
    135 135   fp.write(f' File "{frameRecord.filename}", line {frameRecord.lineno}, in {frameRecord.function}\n')
    136  - for line in frameRecord.code_context:
    137  - fp.write(f' {line.strip()}\n')
     136 + if frameRecord.code_context is not None:
     137 + for line in frameRecord.code_context:
     138 + fp.write(f' {line.strip()}\n')
    138 139   fp.write('\n')
    139 140   
    140  - for frameRecord in trace:
    141  - module = inspect.getmodule(frameRecord[0])
     141 + modules = [inspect.getmodule(frameRecord[0]) for frameRecord in trace]
     142 + for i, (module, frameRecord) in enumerate(zip(modules, trace)):
     143 + if module is None:
     144 + # Module-less frame, e.g. dataclass.__init__
     145 + for j in reversed(range(i)):
     146 + if modules[j] is not None:
     147 + break
     148 + else:
     149 + # No previous module scope
     150 + continue
     151 + module = modules[j]
    142 152   if not module.__name__.startswith('snscrape.') and module.__name__ != 'snscrape':
    143 153   continue
    144 154   locals_ = frameRecord[0].f_locals
    skipped 185 lines
  • ■ ■ ■ ■ ■ ■
    snscrape/base.py
    skipped 162 lines
    163 163   return self._get_entity()
    164 164   
    165 165   def _request(self, method, url, params = None, data = None, headers = None, timeout = 10, responseOkCallback = None, allowRedirects = True, proxies = None):
    166  - proxies = proxies or self._proxies
     166 + proxies = proxies or self._proxies or {}
    167 167   for attempt in range(self._retries + 1):
    168 168   # The request is newly prepared on each retry because of potential cookie updates.
    169 169   req = self._session.prepare_request(requests.Request(method, url, params = params, data = data, headers = headers))
     170 + environmentSettings = self._session.merge_environment_settings(req.url, proxies, None, None, None)
    170 171   logger.info(f'Retrieving {req.url}')
    171 172   logger.debug(f'... with headers: {headers!r}')
    172 173   if data:
    173 174   logger.debug(f'... with data: {data!r}')
     175 + if environmentSettings:
     176 + logger.debug(f'... with environmentSettings: {environmentSettings!r}')
    174 177   try:
    175  - r = self._session.send(req, allow_redirects = allowRedirects, timeout = timeout, proxies = proxies)
     178 + r = self._session.send(req, allow_redirects = allowRedirects, timeout = timeout, **environmentSettings)
    176 179   except requests.exceptions.RequestException as exc:
    177 180   if attempt < self._retries:
    178 181   retrying = ', retrying'
    skipped 66 lines
  • ■ ■ ■ ■ ■ ■
    snscrape/modules/twitter.py
    skipped 98 lines
    99 99  class Video(Medium):
    100 100   thumbnailUrl: str
    101 101   variants: typing.List[VideoVariant]
    102  - duration: float
     102 + duration: typing.Optional[float] = None
    103 103   views: typing.Optional[int] = None
    104 104   
    105 105   
    skipped 26 lines
    132 132   countryCode: str
    133 133   
    134 134   
     135 +class Card:
     136 + pass
     137 + 
     138 + 
    135 139  @dataclasses.dataclass
    136  -class Card:
     140 +class SummaryCard(Card):
    137 141   title: str
    138 142   url: str
    139 143   description: typing.Optional[str] = None
    140 144   thumbnailUrl: typing.Optional[str] = None
     145 + siteUser: typing.Optional['User'] = None
     146 + creatorUser: typing.Optional['User'] = None
     147 + 
     148 + 
     149 +@dataclasses.dataclass
     150 +class AppCard(SummaryCard):
     151 + pass
     152 + 
     153 + 
     154 +@dataclasses.dataclass
     155 +class PollCard(Card):
     156 + options: typing.List['PollOption']
     157 + endDate: datetime.datetime
     158 + duration: int
     159 + finalResults: bool
     160 + lastUpdateDate: typing.Optional[datetime.datetime] = None
     161 + medium: typing.Optional[Medium] = None
     162 + 
     163 + 
     164 +@dataclasses.dataclass
     165 +class PollOption:
     166 + label: str
     167 + count: typing.Optional[int] = None
     168 + 
     169 + 
     170 +@dataclasses.dataclass
     171 +class PlayerCard(Card):
     172 + title: str
     173 + url: str
     174 + description: typing.Optional[str] = None
     175 + imageUrl: typing.Optional[str] = None
     176 + siteUser: typing.Optional['User'] = None
     177 + 
     178 + 
     179 +@dataclasses.dataclass
     180 +class PromoConvoCard(Card):
     181 + actions: typing.List['PromoConvoAction']
     182 + thankYouText: str
     183 + medium: Medium
     184 + thankYouUrl: typing.Optional[str] = None
     185 + thankYouTcoUrl: typing.Optional[str] = None
     186 + cover: typing.Optional['Photo'] = None
     187 + 
     188 + 
     189 +@dataclasses.dataclass
     190 +class PromoConvoAction:
     191 + label: str
     192 + tweet: str
     193 + 
     194 + 
     195 +@dataclasses.dataclass
     196 +class BroadcastCard(Card):
     197 + id: str
     198 + url: str
     199 + title: str
     200 + state: typing.Optional[str] = None
     201 + broadcaster: typing.Optional['User'] = None
     202 + thumbnailUrl: typing.Optional[str] = None
     203 + source: typing.Optional[str] = None
     204 + siteUser: typing.Optional['User'] = None
     205 + 
     206 + 
     207 +@dataclasses.dataclass
     208 +class PeriscopeBroadcastCard(Card):
     209 + id: str
     210 + url: str
     211 + title: str
     212 + description: str
     213 + state: str
     214 + totalParticipants: int
     215 + thumbnailUrl: str
     216 + source: typing.Optional[str] = None
     217 + broadcaster: typing.Optional['User'] = None
     218 + siteUser: typing.Optional['User'] = None
     219 + 
     220 + 
     221 +@dataclasses.dataclass
     222 +class EventCard(Card):
     223 + event: 'Event'
     224 + 
     225 + 
     226 +@dataclasses.dataclass
     227 +class Event:
     228 + id: int
     229 + category: str
     230 + photo: Photo
     231 + title: typing.Optional[str] = None
     232 + description: typing.Optional[str] = None
     233 + 
     234 + @property
     235 + def url(self):
     236 + return f'https://twitter.com/i/events/{self.id}'
     237 + 
     238 + 
     239 +@dataclasses.dataclass
     240 +class NewsletterCard(Card):
     241 + title: str
     242 + description: str
     243 + imageUrl: str
     244 + url: str
     245 + revueAccountId: int
     246 + issueCount: int
     247 + 
     248 + 
     249 +@dataclasses.dataclass
     250 +class NewsletterIssueCard(Card):
     251 + newsletterTitle: str
     252 + newsletterDescription: str
     253 + issueTitle: str
     254 + issueNumber: int
     255 + url: str
     256 + revueAccountId: int
     257 + issueDescription: typing.Optional[str] = None
     258 + imageUrl: typing.Optional[str] = None
     259 + 
     260 + 
     261 +@dataclasses.dataclass
     262 +class AmplifyCard(Card):
     263 + id: str
     264 + video: Video
     265 + 
     266 + 
     267 +@dataclasses.dataclass
     268 +class AppPlayerCard(Card):
     269 + title: str
     270 + video: Video
     271 + appCategory: str
     272 + playerOwnerId: int
     273 + siteUser: typing.Optional['User'] = None
     274 + 
     275 + 
     276 +@dataclasses.dataclass
     277 +class SpacesCard(Card):
     278 + url: str
     279 + id: str
     280 + 
     281 + 
     282 +@dataclasses.dataclass
     283 +class MessageMeCard(Card):
     284 + recipient: 'User'
     285 + url: str
     286 + buttonText: str
     287 + 
     288 + 
     289 +UnifiedCardComponentKey = str
     290 +UnifiedCardDestinationKey = str
     291 +UnifiedCardMediumKey = str
     292 +UnifiedCardAppKey = str
     293 + 
     294 + 
     295 +@dataclasses.dataclass
     296 +class UnifiedCard(Card):
     297 + componentObjects: typing.Dict[UnifiedCardComponentKey, 'UnifiedCardComponentObject']
     298 + destinations: typing.Dict[UnifiedCardDestinationKey, 'UnifiedCardDestination']
     299 + media: typing.Dict[UnifiedCardMediumKey, Medium]
     300 + apps: typing.Optional[typing.Dict[UnifiedCardAppKey, typing.List['UnifiedCardApp']]] = None
     301 + components: typing.Optional[typing.List[UnifiedCardComponentKey]] = None
     302 + swipeableLayoutSlides: typing.Optional[typing.List['UnifiedCardSwipeableLayoutSlide']] = None
     303 + type: typing.Optional[str] = None
     304 + 
     305 + def __post_init__(self):
     306 + if (self.components is None) == (self.swipeableLayoutSlides is None):
     307 + raise ValueError('did not get exactly one of components or swipeableLayoutSlides')
     308 + if self.components and not all(k in self.componentObjects for k in self.components):
     309 + raise ValueError('missing components')
     310 + if self.swipeableLayoutSlides and not all(s.mediumComponentKey in self.componentObjects and s.componentKey in self.componentObjects for s in self.swipeableLayoutSlides):
     311 + raise ValueError('missing components')
     312 + if any(c.destinationKey not in self.destinations for c in self.componentObjects.values() if hasattr(c, 'destinationKey')):
     313 + raise ValueError('missing destinations')
     314 + if any(b.destinationKey not in self.destinations for c in self.componentObjects.values() if isinstance(c, UnifiedCardButtonGroupComponentObject) for b in c.buttons):
     315 + raise ValueError('missing destinations')
     316 + mediaKeys = []
     317 + for c in self.componentObjects.values():
     318 + if isinstance(c, UnifiedCardMediumComponentObject):
     319 + mediaKeys.append(c.mediumKey)
     320 + elif isinstance(c, UnifiedCardSwipeableMediaComponentObject):
     321 + mediaKeys.extend(x.mediumKey for x in c.media)
     322 + mediaKeys.extend(d.mediumKey for d in self.destinations.values() if d.mediumKey is not None)
     323 + mediaKeys.extend(a.iconMediumKey for l in (self.apps.values() if self.apps is not None else []) for a in l if a.iconMediumKey is not None)
     324 + if any(k not in self.media for k in mediaKeys):
     325 + raise ValueError('missing media')
     326 + if any(c.appKey not in self.apps for c in self.componentObjects.values() if hasattr(c, 'appKey')):
     327 + raise ValueError('missing apps')
     328 + if any(d.appKey not in self.apps for d in self.destinations.values() if d.appKey is not None):
     329 + raise ValueError('missing apps')
     330 + 
     331 + 
     332 +class UnifiedCardComponentObject:
     333 + pass
     334 + 
     335 + 
     336 +@dataclasses.dataclass
     337 +class UnifiedCardDetailComponentObject(UnifiedCardComponentObject):
     338 + content: str
     339 + destinationKey: UnifiedCardDestinationKey
     340 + 
     341 + 
     342 +@dataclasses.dataclass
     343 +class UnifiedCardMediumComponentObject(UnifiedCardComponentObject):
     344 + mediumKey: UnifiedCardMediumKey
     345 + destinationKey: UnifiedCardDestinationKey
     346 + 
     347 + 
     348 +@dataclasses.dataclass
     349 +class UnifiedCardButtonGroupComponentObject(UnifiedCardComponentObject):
     350 + buttons: typing.List['UnifiedCardButton']
     351 + 
     352 + 
     353 +@dataclasses.dataclass
     354 +class UnifiedCardButton:
     355 + text: str
     356 + destinationKey: UnifiedCardDestinationKey
     357 + 
     358 + 
     359 +@dataclasses.dataclass
     360 +class UnifiedCardSwipeableMediaComponentObject(UnifiedCardComponentObject):
     361 + media: typing.List['UnifiedCardSwipeableMediaMedium']
     362 + 
     363 + 
     364 +@dataclasses.dataclass
     365 +class UnifiedCardSwipeableMediaMedium:
     366 + mediumKey: UnifiedCardMediumKey
     367 + destinationKey: UnifiedCardDestinationKey
     368 + 
     369 + 
     370 +@dataclasses.dataclass
     371 +class UnifiedCardAppStoreComponentObject(UnifiedCardComponentObject):
     372 + appKey: UnifiedCardAppKey
     373 + destinationKey: UnifiedCardDestinationKey
     374 + 
     375 + 
     376 +@dataclasses.dataclass
     377 +class UnifiedCardTwitterListDetailsComponentObject(UnifiedCardComponentObject):
     378 + name: str
     379 + memberCount: int
     380 + subscriberCount: int
     381 + user: 'User'
     382 + destinationKey: UnifiedCardDestinationKey
     383 + 
     384 + 
     385 +@dataclasses.dataclass
     386 +class UnifiedCardTwitterCommunityDetailsComponentObject(UnifiedCardComponentObject):
     387 + name: str
     388 + theme: str
     389 + membersCount: int
     390 + destinationKey: UnifiedCardDestinationKey
     391 + membersFacepile: typing.Optional[typing.List['User']] = None
     392 + 
     393 + 
     394 +@dataclasses.dataclass
     395 +class UnifiedCardDestination:
     396 + url: typing.Optional[str] = None
     397 + appKey: typing.Optional[UnifiedCardAppKey] = None
     398 + mediumKey: typing.Optional[UnifiedCardMediumKey] = None
     399 + 
     400 + def __post_init__(self):
     401 + if (self.url is None) == (self.appKey is None):
     402 + raise ValueError('did not get exactly one of url and appKey')
     403 + 
     404 + 
     405 +@dataclasses.dataclass
     406 +class UnifiedCardApp:
     407 + type: str
     408 + id: str
     409 + title: str
     410 + category: str
     411 + countryCode: str
     412 + url: str
     413 + description: typing.Optional[str] = None
     414 + iconMediumKey: typing.Optional[UnifiedCardMediumKey] = None
     415 + size: typing.Optional[int] = None
     416 + installs: typing.Optional[int] = None
     417 + ratingAverage: typing.Optional[float] = None
     418 + ratingCount: typing.Optional[int] = None
     419 + isFree: typing.Optional[bool] = None
     420 + isEditorsChoice: typing.Optional[bool] = None
     421 + hasInAppPurchases: typing.Optional[bool] = None
     422 + hasInAppAds: typing.Optional[bool] = None
     423 + 
     424 + 
     425 +@dataclasses.dataclass
     426 +class UnifiedCardSwipeableLayoutSlide:
     427 + mediumComponentKey: UnifiedCardComponentKey
     428 + componentKey: UnifiedCardComponentKey
    141 429   
    142 430   
    143 431  @dataclasses.dataclass
    skipped 46 lines
    190 478   url: typing.Optional[str] = None
    191 479   badgeUrl: typing.Optional[str] = None
    192 480   longDescription: typing.Optional[str] = None
     481 + 
     482 + 
     483 +@dataclasses.dataclass
     484 +class UserRef:
     485 + id: int
    193 486   
    194 487   
    195 488  @dataclasses.dataclass
    skipped 314 lines
    510 803   raise snscrape.base.ScraperException(f'Unable to handle entry {entryId!r}')
    511 804   yield self._tweet_to_tweet(tweet, obj)
    512 805   
     806 + def _get_tweet_id(self, tweet):
     807 + return tweet['id'] if 'id' in tweet else int(tweet['id_str'])
     808 + 
    513 809   def _make_tweet(self, tweet, user, retweetedTweet = None, quotedTweet = None, card = None):
     810 + tweetId = self._get_tweet_id(tweet)
    514 811   kwargs = {}
    515  - kwargs['id'] = tweet['id'] if 'id' in tweet else int(tweet['id_str'])
     812 + kwargs['id'] = tweetId
    516 813   kwargs['content'] = tweet['full_text']
    517 814   kwargs['renderedContent'] = self._render_text_with_urls(tweet['full_text'], tweet['entities'].get('urls'))
    518 815   kwargs['user'] = user
    skipped 1 lines
    520 817   if tweet['entities'].get('urls'):
    521 818   kwargs['outlinks'] = [u['expanded_url'] for u in tweet['entities']['urls']]
    522 819   kwargs['tcooutlinks'] = [u['url'] for u in tweet['entities']['urls']]
    523  - kwargs['url'] = f'https://twitter.com/{user.username}/status/{kwargs["id"]}'
     820 + kwargs['url'] = f'https://twitter.com/{user.username}/status/{tweetId}'
    524 821   kwargs['replyCount'] = tweet['reply_count']
    525 822   kwargs['retweetCount'] = tweet['retweet_count']
    526 823   kwargs['likeCount'] = tweet['favorite_count']
    skipped 8 lines
    535 832   if 'extended_entities' in tweet and 'media' in tweet['extended_entities']:
    536 833   media = []
    537 834   for medium in tweet['extended_entities']['media']:
    538  - if medium['type'] == 'photo':
    539  - if '.' not in medium['media_url_https']:
    540  - _logger.warning(f'Skipping malformed medium URL on tweet {kwargs["id"]}: {medium["media_url_https"]!r} contains no dot')
    541  - continue
    542  - baseUrl, format = medium['media_url_https'].rsplit('.', 1)
    543  - if format not in ('jpg', 'png'):
    544  - _logger.warning(f'Skipping photo with unknown format on tweet {kwargs["id"]}: {format!r}')
    545  - continue
    546  - media.append(Photo(
    547  - previewUrl = f'{baseUrl}?format={format}&name=small',
    548  - fullUrl = f'{baseUrl}?format={format}&name=large',
    549  - ))
    550  - elif medium['type'] == 'video' or medium['type'] == 'animated_gif':
    551  - variants = []
    552  - for variant in medium['video_info']['variants']:
    553  - variants.append(VideoVariant(contentType = variant['content_type'], url = variant['url'], bitrate = variant.get('bitrate')))
    554  - mKwargs = {
    555  - 'thumbnailUrl': medium['media_url_https'],
    556  - 'variants': variants,
    557  - }
    558  - if medium['type'] == 'video':
    559  - mKwargs['duration'] = medium['video_info']['duration_millis'] / 1000
    560  - if (ext := medium.get('ext')) and (mediaStats := ext['mediaStats']) and isinstance(r := mediaStats['r'], dict) and 'ok' in r and isinstance(r['ok'], dict):
    561  - mKwargs['views'] = int(r['ok']['viewCount'])
    562  - elif (mediaStats := medium.get('mediaStats')):
    563  - mKwargs['views'] = mediaStats['viewCount']
    564  - cls = Video
    565  - elif medium['type'] == 'animated_gif':
    566  - cls = Gif
    567  - media.append(cls(**mKwargs))
     835 + if (mediumO := self._make_medium(medium, tweetId)):
     836 + media.append(mediumO)
    568 837   if media:
    569 838   kwargs['media'] = media
    570 839   if retweetedTweet:
    skipped 34 lines
    605 874   kwargs['cashtags'] = [o['text'] for o in tweet['entities']['symbols']]
    606 875   if card:
    607 876   kwargs['card'] = card
     877 +<<<<<<< HEAD
    608 878   # Try to convert the URL to the non-shortened/t.co one
    609 879   try:
    610 880   i = kwargs['tcooutlinks'].index(card.url)
    skipped 4 lines
    615 885   pass
    616 886   else:
    617 887   card.url = kwargs['outlinks'][i]
     888 +=======
     889 + if hasattr(card, 'url') and '//t.co/' in card.url:
     890 + # Try to convert the URL to the non-shortened/t.co one
     891 + # Retweets inherit the card but not the outlinks; try to get them from the retweeted tweet instead in that case.
     892 + if 'tcooutlinks' in kwargs and card.url in kwargs['tcooutlinks']:
     893 + card.url = kwargs['outlinks'][kwargs['tcooutlinks'].index(card.url)]
     894 + elif retweetedTweet and retweetedTweet.tcooutlinks and card.url in retweetedTweet.tcooutlinks:
     895 + card.url = retweetedTweet.outlinks[retweetedTweet.tcooutlinks.index(card.url)]
     896 + else:
     897 + _logger.warning(f'Could not translate t.co card URL on tweet {tweetId}')
     898 +>>>>>>> ed3ea944d177157d688786470c8369198b0ce8ce
    618 899   return Tweet(**kwargs)
    619 900   
    620  - def _make_card(self, card, apiType):
    621  - cardKwargs = {}
    622  - for key, kwarg in [('title', 'title'), ('description', 'description'), ('card_url', 'url'), ('thumbnail_image_original', 'thumbnailUrl')]:
    623  - if apiType is _TwitterAPIType.V2:
    624  - value = card['binding_values'].get(key)
    625  - elif apiType is _TwitterAPIType.GRAPHQL:
    626  - value = next((o['value'] for o in card['legacy']['binding_values'] if o['key'] == key), None)
    627  - if not value:
     901 + def _make_medium(self, medium, tweetId):
     902 + if medium['type'] == 'photo':
     903 + if '?format=' in medium['media_url_https'] or '&format=' in medium['media_url_https']:
     904 + return Photo(previewUrl = medium['media_url_https'], fullUrl = medium['media_url_https'])
     905 + if '.' not in medium['media_url_https']:
     906 + _logger.warning(f'Skipping malformed medium URL on tweet {tweetId}: {medium["media_url_https"]!r} contains no dot')
     907 + return
     908 + baseUrl, format = medium['media_url_https'].rsplit('.', 1)
     909 + if format not in ('jpg', 'png'):
     910 + _logger.warning(f'Skipping photo with unknown format on tweet {tweetId}: {format!r}')
     911 + return
     912 + return Photo(
     913 + previewUrl = f'{baseUrl}?format={format}&name=small',
     914 + fullUrl = f'{baseUrl}?format={format}&name=large',
     915 + )
     916 + elif medium['type'] == 'video' or medium['type'] == 'animated_gif':
     917 + variants = []
     918 + for variant in medium['video_info']['variants']:
     919 + variants.append(VideoVariant(contentType = variant['content_type'], url = variant['url'], bitrate = variant.get('bitrate')))
     920 + mKwargs = {
     921 + 'thumbnailUrl': medium['media_url_https'],
     922 + 'variants': variants,
     923 + }
     924 + if medium['type'] == 'video':
     925 + mKwargs['duration'] = medium['video_info']['duration_millis'] / 1000
     926 + if (ext := medium.get('ext')) and (mediaStats := ext.get('mediaStats')) and isinstance(r := mediaStats['r'], dict) and 'ok' in r and isinstance(r['ok'], dict):
     927 + mKwargs['views'] = int(r['ok']['viewCount'])
     928 + elif (mediaStats := medium.get('mediaStats')):
     929 + mKwargs['views'] = mediaStats['viewCount']
     930 + cls = Video
     931 + elif medium['type'] == 'animated_gif':
     932 + cls = Gif
     933 + return cls(**mKwargs)
     934 + else:
     935 + _logger.warning(f'Unsupported medium type on tweet {tweetId}: {medium["type"]!r}')
     936 + 
     937 + def _make_card(self, card, apiType, tweetId):
     938 + bindingValues = {}
     939 + 
     940 + def _kwargs_from_map(keyKwargMap):
     941 + nonlocal bindingValues
     942 + return {kwarg: bindingValues[key] for key, kwarg in keyKwargMap.items() if key in bindingValues}
     943 + 
     944 + userRefs = {}
     945 + if apiType is _TwitterAPIType.V2:
     946 + for o in card.get('users', {}).values():
     947 + userId = o['id']
     948 + assert userId not in userRefs
     949 + userRefs[userId] = self._user_to_user(o)
     950 + elif apiType is _TwitterAPIType.GRAPHQL:
     951 + for o in card['legacy'].get('user_refs', {}):
     952 + userId = int(o['rest_id'])
     953 + if userId in userRefs:
     954 + _logger.warning(f'Duplicate user {userId} in card on tweet {tweetId}')
     955 + continue
     956 + if 'legacy' in o:
     957 + userRefs[userId] = self._user_to_user(o['legacy'], id_ = userId)
     958 + else:
     959 + userRefs[userId] = UserRef(id = userId)
     960 + 
     961 + if apiType is _TwitterAPIType.V2:
     962 + messyBindingValues = card['binding_values'].items()
     963 + elif apiType is _TwitterAPIType.GRAPHQL:
     964 + messyBindingValues = ((x['key'], x['value']) for x in card['legacy']['binding_values'])
     965 + for key, value in messyBindingValues:
     966 + if 'type' not in value:
     967 + # Silently ignore creator/site entries since they frequently appear like this.
     968 + if key not in ('creator', 'site'):
     969 + _logger.warning(f'Skipping type-less card value {key!r} on tweet {tweetId}')
    628 970   continue
    629 971   if value['type'] == 'STRING':
    630  - cardKwargs[kwarg] = value['string_value']
     972 + bindingValues[key] = value['string_value']
     973 + if key.endswith('_datetime_utc'):
     974 + bindingValues[key] = datetime.datetime.strptime(bindingValues[key], '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo = datetime.timezone.utc)
    631 975   elif value['type'] == 'IMAGE':
    632  - cardKwargs[kwarg] = value['image_value']['url']
     976 + bindingValues[key] = value['image_value']['url']
     977 + elif value['type'] == 'IMAGE_COLOR':
     978 + # Silently discard this.
     979 + pass
     980 + elif value['type'] == 'BOOLEAN':
     981 + bindingValues[key] = value['boolean_value']
     982 + elif value['type'] == 'USER':
     983 + bindingValues[key] = userRefs[int(value['user_value']['id_str'])]
     984 + else:
     985 + _logger.warning(f'Unsupported card value type on {key!r} on tweet {tweetId}: {value["type"]!r}')
     986 + 
     987 + if apiType is _TwitterAPIType.V2:
     988 + cardName = card['name']
     989 + elif apiType is _TwitterAPIType.GRAPHQL:
     990 + cardName = card['legacy']['name']
     991 + 
     992 + if cardName in ('summary', 'summary_large_image', 'app', 'direct_store_link_app'):
     993 + keyKwargMap = {
     994 + 'title': 'title',
     995 + 'description': 'description',
     996 + 'card_url': 'url',
     997 + 'site': 'siteUser',
     998 + 'creator': 'creatorUser',
     999 + }
     1000 + if cardName in ('app', 'direct_store_link_app'):
     1001 + keyKwargMap['thumbnail_original'] = 'thumbnailUrl'
     1002 + return AppCard(**_kwargs_from_map(keyKwargMap))
    633 1003   else:
    634  - raise snscrape.base.ScraperError(f'Unknown card value type: {value["type"]!r}')
    635  - return Card(**cardKwargs)
     1004 + keyKwargMap['thumbnail_image_original'] = 'thumbnailUrl'
     1005 + return SummaryCard(**_kwargs_from_map(keyKwargMap))
     1006 + elif any(cardName.startswith(x) for x in ('poll2choice_', 'poll3choice_', 'poll4choice_')) and cardName.split('_', 1)[1] in ('text_only', 'image', 'video'):
     1007 + kwargs = _kwargs_from_map({'end_datetime_utc': 'endDate', 'last_updated_datetime_utc': 'lastUpdateDate', 'duration_minutes': 'duration', 'counts_are_final': 'finalResults'})
     1008 + 
     1009 + options = []
     1010 + for key in sorted(bindingValues):
     1011 + if key.startswith('choice') and key.endswith('_label'):
     1012 + optKwargs = {'label': bindingValues[key]}
     1013 + if (count := bindingValues.get(f'{key[:-5]}count')):
     1014 + optKwargs['count'] = int(count)
     1015 + options.append(PollOption(**optKwargs))
     1016 + kwargs['options'] = options
     1017 + kwargs['duration'] = int(kwargs['duration'])
     1018 + 
     1019 + if cardName.endswith('_image'):
     1020 + kwargs['medium'] = Photo(previewUrl = bindingValues['image_small'], fullUrl = bindingValues['image_original'])
     1021 + elif cardName.endswith('_video'):
     1022 + variants = []
     1023 + variants.append(VideoVariant(contentType = 'application/x-mpegurl', url = bindingValues['player_hls_url'], bitrate = None))
     1024 + if 'vmap' not in bindingValues['player_stream_url']:
     1025 + _logger.warning(f'Non-VMAP URL in {cardName} player_stream_url on tweet {tweetId}')
     1026 + variants.append(VideoVariant(contentType = 'text/xml', url = bindingValues['player_stream_url'], bitrate = None))
     1027 + kwargs['medium'] = Video(thumbnailUrl = bindingValues['player_image_original'], variants = variants, duration = int(bindingValues['content_duration_seconds']))
     1028 + 
     1029 + return PollCard(**kwargs)
     1030 + elif cardName == 'player':
     1031 + return PlayerCard(**_kwargs_from_map({'title': 'title', 'description': 'description', 'card_url': 'url', 'player_image_original': 'imageUrl', 'site': 'siteUser'}))
     1032 + elif cardName in ('promo_image_convo', 'promo_video_convo'):
     1033 + kwargs = _kwargs_from_map({'thank_you_text': 'thankYouText', 'thank_you_url': 'thankYouUrl', 'thank_you_shortened_url': 'thankYouTcoUrl'})
     1034 + kwargs['actions'] = []
     1035 + for l in ('one', 'two', 'three', 'four'):
     1036 + if f'cta_{l}' in bindingValues:
     1037 + kwargs['actions'].append(PromoConvoAction(label = bindingValues[f'cta_{l}'], tweet = bindingValues[f'cta_{l}_tweet']))
     1038 + if 'image' in cardName:
     1039 + kwargs['medium'] = Photo(previewUrl = bindingValues['promo_image_small'], fullUrl = bindingValues['promo_image_original'])
     1040 + if 'cover_promo_image' in bindingValues:
     1041 + kwargs['cover'] = Photo(previewUrl = bindingValues['cover_promo_image_small'], fullUrl = bindingValues['cover_promo_image_original'])
     1042 + elif 'video' in cardName:
     1043 + variants = []
     1044 + variants.append(VideoVariant(contentType = bindingValues['player_stream_content_type'], url = bindingValues['player_stream_url'], bitrate = None))
     1045 + if bindingValues['player_stream_url'] != bindingValues['player_url']:
     1046 + if 'vmap' not in bindingValues['player_url']:
     1047 + _logger.warning(f'Non-VMAP URL in {cardName} player_url on tweet {tweetId}')
     1048 + variants.append(VideoVariant(contentType = 'text/xml', url = bindingValues['player_url'], bitrate = None))
     1049 + kwargs['medium'] = Video(thumbnailUrl = bindingValues['player_image_original'], variants = variants, duration = int(bindingValues['content_duration_seconds']))
     1050 + return PromoConvoCard(**kwargs)
     1051 + elif cardName in ('745291183405076480:broadcast', '3691233323:periscope_broadcast'):
     1052 + keyKwargMap = {'broadcast_state': 'state', 'broadcast_source': 'source', 'site': 'siteUser'}
     1053 + if cardName == '745291183405076480:broadcast':
     1054 + keyKwargMap = {**keyKwargMap, 'broadcast_id': 'id', 'broadcast_url': 'url', 'broadcast_title': 'title', 'broadcast_thumbnail_original': 'thumbnailUrl'}
     1055 + else:
     1056 + keyKwargMap = {**keyKwargMap, 'id': 'id', 'url': 'url', 'title': 'title', 'description': 'description', 'total_participants': 'totalParticipants', 'full_size_thumbnail_url': 'thumbnailUrl'}
     1057 + kwargs = _kwargs_from_map(keyKwargMap)
     1058 + if 'broadcaster_twitter_id' in bindingValues:
     1059 + kwargs['broadcaster'] = User(id = int(bindingValues['broadcaster_twitter_id']), username = bindingValues['broadcaster_username'], displayname = bindingValues['broadcaster_display_name'])
     1060 + if 'siteUser' not in kwargs:
     1061 + kwargs['siteUser'] = None
     1062 + if cardName == '745291183405076480:broadcast':
     1063 + return BroadcastCard(**kwargs)
     1064 + else:
     1065 + kwargs['totalParticipants'] = int(kwargs['totalParticipants'])
     1066 + return PeriscopeBroadcastCard(**kwargs)
     1067 + elif cardName == '745291183405076480:live_event':
     1068 + kwargs = _kwargs_from_map({'event_id': 'id', 'event_title': 'title', 'event_category': 'category', 'event_subtitle': 'description'})
     1069 + kwargs['id'] = int(kwargs['id'])
     1070 + kwargs['photo'] = Photo(previewUrl = bindingValues['event_thumbnail_small'], fullUrl = bindingValues.get('event_thumbnail_original') or bindingValues['event_thumbnail'])
     1071 + return EventCard(event = Event(**kwargs))
     1072 + elif cardName == '3337203208:newsletter_publication':
     1073 + kwargs = _kwargs_from_map({'newsletter_title': 'title', 'newsletter_description': 'description', 'newsletter_image_original': 'imageUrl', 'card_url': 'url', 'revue_account_id': 'revueAccountId', 'issue_count': 'issueCount'})
     1074 + kwargs['revueAccountId'] = int(kwargs['revueAccountId'])
     1075 + kwargs['issueCount'] = int(kwargs['issueCount'])
     1076 + return NewsletterCard(**kwargs)
     1077 + elif cardName == '3337203208:newsletter_issue':
     1078 + kwargs = _kwargs_from_map({
     1079 + 'newsletter_title': 'newsletterTitle',
     1080 + 'newsletter_description': 'newsletterDescription',
     1081 + 'issue_title': 'issueTitle',
     1082 + 'issue_description': 'issueDescription',
     1083 + 'issue_number': 'issueNumber',
     1084 + 'issue_image_original': 'imageUrl',
     1085 + 'card_url': 'url',
     1086 + 'revue_account_id': 'revueAccountId'
     1087 + })
     1088 + kwargs['issueNumber'] = int(kwargs['issueNumber'])
     1089 + kwargs['revueAccountId'] = int(kwargs['revueAccountId'])
     1090 + return NewsletterIssueCard(**kwargs)
     1091 + elif cardName == 'amplify':
     1092 + return AmplifyCard(
     1093 + id = bindingValues['amplify_content_id'],
     1094 + video = Video(
     1095 + thumbnailUrl = bindingValues['player_image'],
     1096 + variants = [VideoVariant(contentType = bindingValues['player_stream_content_type'], url = bindingValues['amplify_url_vmap'], bitrate = None)],
     1097 + ),
     1098 + )
     1099 + elif cardName == 'appplayer':
     1100 + kwargs = _kwargs_from_map({'title': 'title', 'app_category': 'appCategory', 'player_owner_id': 'playerOwnerId', 'site': 'siteUser'})
     1101 + kwargs['playerOwnerId'] = int(kwargs['playerOwnerId'])
     1102 + variants = []
     1103 + variants.append(VideoVariant(contentType = 'application/x-mpegurl', url = bindingValues['player_hls_url'], bitrate = None))
     1104 + if 'vmap' not in bindingValues['player_url']:
     1105 + _logger.warning(f'Non-VMAP URL in {cardName} player_url on tweet {tweetId}')
     1106 + variants.append(VideoVariant(contentType = 'text/xml', url = bindingValues['player_url'], bitrate = None))
     1107 + kwargs['video'] = Video(thumbnailUrl = bindingValues['player_image_original'], variants = variants, duration = int(bindingValues['content_duration_seconds']))
     1108 + return AppPlayerCard(**kwargs)
     1109 + elif cardName == '3691233323:audiospace':
     1110 + return SpacesCard(**_kwargs_from_map({'card_url': 'url', 'id': 'id'}))
     1111 + elif cardName == '2586390716:message_me':
     1112 + # Note that the strings in Twitter's JS appear to have an incorrect mapping that then gets changed somewhere in the 1.8 MiB of JS!
     1113 + # cta_1, 3, and 4 should mean 'Message us', 'Send a private message', and 'Send me a private message', but the correct mapping is currently unknown.
     1114 + ctas = {'message_me_card_cta_2': 'Send us a private message'}
     1115 + if bindingValues['cta'] not in ctas:
     1116 + _logger.warning(f'Unsupported message_me card cta on tweet {tweetId}: {bindingValues["cta"]!r}')
     1117 + return
     1118 + return MessageMeCard(**_kwargs_from_map({'recipient': 'recipient', 'card_url': 'url'}), buttonText = ctas[bindingValues['cta']])
     1119 + elif cardName == 'unified_card':
     1120 + o = json.loads(bindingValues['unified_card'])
     1121 + kwargs = {}
     1122 + if 'type' in o:
     1123 + unifiedCardType = o.get('type')
     1124 + if unifiedCardType not in (
     1125 + 'image_app',
     1126 + 'image_carousel_app',
     1127 + 'image_carousel_website',
     1128 + 'image_multi_dest_carousel_website',
     1129 + 'image_website',
     1130 + 'mixed_media_multi_dest_carousel_website',
     1131 + 'mixed_media_single_dest_carousel_app',
     1132 + 'mixed_media_single_dest_carousel_website',
     1133 + 'video_app',
     1134 + 'video_carousel_app',
     1135 + 'video_carousel_website',
     1136 + 'video_multi_dest_carousel_website',
     1137 + 'video_website',
     1138 + ):
     1139 + _logger.warning(f'Unsupported unified_card type on tweet {tweetId}: {unifiedCardType!r}')
     1140 + return
     1141 + kwargs['type'] = unifiedCardType
     1142 + elif set(c['type'] for c in o['component_objects'].values()) not in ({'media', 'twitter_list_details'}, {'media', 'community_details'}):
     1143 + _logger.warning(f'Unsupported unified_card type on tweet {tweetId}')
     1144 + return
     1145 + 
     1146 + kwargs['componentObjects'] = {}
     1147 + for k, v in o['component_objects'].items():
     1148 + if v['type'] == 'details':
     1149 + co = UnifiedCardDetailComponentObject(content = v['data']['title']['content'], destinationKey = v['data']['destination'])
     1150 + elif v['type'] == 'media':
     1151 + co = UnifiedCardMediumComponentObject(mediumKey = v['data']['id'], destinationKey = v['data']['destination'])
     1152 + elif v['type'] == 'button_group':
     1153 + if not all(b['type'] == 'cta' for b in v['data']['buttons']):
     1154 + _logger.warning(f'Unsupported unified_card button_group button type on tweet {tweetId}')
     1155 + return
     1156 + buttons = [UnifiedCardButton(text = b['action'][0].upper() + re.sub('[A-Z]', lambda x: f' {x[0]}', b['action'][1:]), destinationKey = b['destination']) for b in v['data']['buttons']]
     1157 + co = UnifiedCardButtonGroupComponentObject(buttons = buttons)
     1158 + elif v['type'] == 'swipeable_media':
     1159 + media = [UnifiedCardSwipeableMediaMedium(mediumKey = m['id'], destinationKey = m['destination']) for m in v['data']['media_list']]
     1160 + co = UnifiedCardSwipeableMediaComponentObject(media = media)
     1161 + elif v['type'] == 'app_store_details':
     1162 + co = UnifiedCardAppStoreComponentObject(appKey = v['data']['app_id'], destinationKey = v['data']['destination'])
     1163 + elif v['type'] == 'twitter_list_details':
     1164 + co = UnifiedCardTwitterListDetailsComponentObject(
     1165 + name = v['data']['name']['content'],
     1166 + memberCount = v['data']['member_count'],
     1167 + subscriberCount = v['data']['subscriber_count'],
     1168 + user = self._user_to_user(o['users'][v['data']['user_id']]),
     1169 + destinationKey = v['data']['destination'],
     1170 + )
     1171 + elif v['type'] == 'community_details':
     1172 + co = UnifiedCardTwitterCommunityDetailsComponentObject(
     1173 + name = v['data']['name']['content'],
     1174 + theme = v['data']['theme'],
     1175 + membersCount = v['data']['member_count'],
     1176 + destinationKey = v['data']['destination'],
     1177 + membersFacepile = [self._user_to_user(u) for u in map(o['users'].get, v['data']['members_facepile']) if u],
     1178 + )
     1179 + else:
     1180 + _logger.warning(f'Unsupported unified_card component type on tweet {tweetId}: {v["type"]!r}')
     1181 + return
     1182 + kwargs['componentObjects'][k] = co
     1183 + 
     1184 + kwargs['destinations'] = {}
     1185 + for k, v in o['destination_objects'].items():
     1186 + dKwargs = {}
     1187 + if 'url_data' in v['data']:
     1188 + dKwargs['url'] = v['data']['url_data']['url']
     1189 + if 'app_id' in v['data']:
     1190 + dKwargs['appKey'] = v['data']['app_id']
     1191 + if 'media_id' in v['data']:
     1192 + dKwargs['mediumKey'] = v['data']['media_id']
     1193 + kwargs['destinations'][k] = UnifiedCardDestination(**dKwargs)
     1194 + 
     1195 + kwargs['media'] = {}
     1196 + for k, v in o['media_entities'].items():
     1197 + if (medium := self._make_medium(v, tweetId)):
     1198 + kwargs['media'][k] = medium
     1199 + 
     1200 + if 'app_store_data' in o:
     1201 + kwargs['apps'] = {}
     1202 + for k, v in o['app_store_data'].items():
     1203 + variants = []
     1204 + for var in v:
     1205 + vKwargsMap = {
     1206 + 'type': 'type',
     1207 + 'id': 'id',
     1208 + 'icon_media_key': 'iconMediumKey',
     1209 + 'country_code': 'countryCode',
     1210 + 'num_installs': 'installs',
     1211 + 'size_bytes': 'size',
     1212 + 'is_free': 'isFree',
     1213 + 'is_editors_choice': 'isEditorsChoice',
     1214 + 'has_in_app_purchases': 'hasInAppPurchases',
     1215 + 'has_in_app_ads': 'hasInAppAds',
     1216 + }
     1217 + vKwargs = {kwarg: var[key] for key, kwarg in vKwargsMap.items() if key in var}
     1218 + vKwargs['title'] = var['title']['content']
     1219 + if 'description' in var:
     1220 + vKwargs['description'] = var['description']['content']
     1221 + vKwargs['category'] = var['category']['content']
     1222 + if (ratings := var['ratings']):
     1223 + vKwargs['ratingAverage'] = var['ratings']['star']
     1224 + vKwargs['ratingCount'] = var['ratings']['count']
     1225 + vKwargs['url'] = f'https://play.google.com/store/apps/details?id={var["id"]}' if var['type'] == 'android_app' else f'https://itunes.apple.com/app/id{var["id"]}'
     1226 + variants.append(UnifiedCardApp(**vKwargs))
     1227 + kwargs['apps'][k] = variants
     1228 + 
     1229 + if o['components']:
     1230 + kwargs['components'] = o['components']
     1231 + 
     1232 + if 'layout' in o:
     1233 + if o['layout']['type'] != 'swipeable':
     1234 + _logger.warning(f'Unsupported unified_card layout type on tweet {tweetId}: {o["layout"]["type"]!r}')
     1235 + return
     1236 + kwargs['swipeableLayoutSlides'] = [UnifiedCardSwipeableLayoutSlide(mediumComponentKey = v[0], componentKey = v[1]) for v in o['layout']['data']['slides']]
     1237 + 
     1238 + return UnifiedCard(**kwargs)
     1239 + 
     1240 + _logger.warning(f'Unsupported card type on tweet {tweetId}: {cardName!r}')
    636 1241   
    637 1242   def _tweet_to_tweet(self, tweet, obj):
    638 1243   user = self._user_to_user(obj['globalObjects']['users'][tweet['user_id_str']])
    skipped 3 lines
    642 1247   if 'quoted_status_id_str' in tweet and tweet['quoted_status_id_str'] in obj['globalObjects']['tweets']:
    643 1248   kwargs['quotedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['quoted_status_id_str']], obj)
    644 1249   if 'card' in tweet:
    645  - kwargs['card'] = self._make_card(tweet['card'], _TwitterAPIType.V2)
     1250 + kwargs['card'] = self._make_card(tweet['card'], _TwitterAPIType.V2, self._get_tweet_id(tweet))
    646 1251   return self._make_tweet(tweet, user, **kwargs)
    647 1252   
    648 1253   def _graphql_timeline_tweet_item_result_to_tweet(self, result):
    skipped 23 lines
    672 1277   elif 'quoted_status_id_str' in tweet:
    673 1278   kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
    674 1279   if 'card' in result:
    675  - kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
     1280 + kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL, self._get_tweet_id(tweet))
    676 1281   return self._make_tweet(tweet, user, **kwargs)
    677 1282   
    678 1283   def _graphql_timeline_instructions_to_tweets(self, instructions, includeConversationThreads = False):
    skipped 435 lines
Please wait...
Page is in error, reload to recover