■ ■ ■ ■ ■ ■
snscrape/modules/telegram.py
| skipped 55 lines |
56 | 56 | | forwarded: typing.Optional['Channel'] = None |
57 | 57 | | forwardedUrl: typing.Optional[str] = None |
58 | 58 | | media: typing.Optional[typing.List['Medium']] = None |
59 | | - | views: typing.Optional[int] = None |
| 59 | + | views: typing.Optional[snscrape.base.IntWithGranularity] = None |
60 | 60 | | linkPreview: typing.Optional[LinkPreview] = None |
61 | 61 | | |
62 | 62 | | outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks') |
| skipped 112 lines |
175 | 175 | | for voicePlayer in post.find_all('a', {'class': 'tgme_widget_message_voice_player'}): |
176 | 176 | | audioUrl = voicePlayer.find('audio')['src'] |
177 | 177 | | durationStr = voicePlayer.find('time').text |
178 | | - | duration = durationStrToSeconds(durationStr) |
| 178 | + | duration = _durationStrToSeconds(durationStr) |
179 | 179 | | barHeights = [float(s['style'].split(':')[-1].strip(';%')) for s in voicePlayer.find('div', {'class': 'bar'}).find_all('s')] |
180 | 180 | | |
181 | 181 | | media.append(VoiceMessage(url = audioUrl, duration = duration, bars = barHeights)) |
| skipped 18 lines |
200 | 200 | | else: |
201 | 201 | | cls = Video |
202 | 202 | | durationStr = videoPlayer.find('time').text |
203 | | - | mKwargs['duration'] = durationStrToSeconds(durationStr) |
| 203 | + | mKwargs['duration'] = _durationStrToSeconds(durationStr) |
204 | 204 | | media.append(cls(**mKwargs)) |
205 | 205 | | |
206 | 206 | | linkPreview = None |
| skipped 16 lines |
223 | 223 | | outlinks.remove(kwargs['href']) |
224 | 224 | | |
225 | 225 | | viewsSpan = post.find('span', class_ = 'tgme_widget_message_views') |
226 | | - | views = None if viewsSpan is None else parse_num(viewsSpan.text) |
| 226 | + | views = None if viewsSpan is None else _parse_num(viewsSpan.text) |
| 227 | + | |
| 228 | + | outlinks = outlinks if outlinks else None |
| 229 | + | media = media if media else None |
| 230 | + | mentions = mentions if mentions else None |
| 231 | + | hashtags = hashtags if hashtags else None |
227 | 232 | | |
228 | 233 | | yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, mentions = mentions, hashtags = hashtags, linkPreview = linkPreview, media = media, forwarded = forwarded, forwardedUrl = forwardedUrl, views = views) |
229 | 234 | | |
| skipped 22 lines |
252 | 257 | | else: |
253 | 258 | | break |
254 | 259 | | nextPageUrl = urllib.parse.urljoin(r.url, pageLink['href']) |
255 | | - | r = self._get(nextPageUrl, headers = self._headers, responseOkCallback = telegramResponseOkCallback) |
| 260 | + | r = self._get(nextPageUrl, headers = self._headers, responseOkCallback = _telegramResponseOkCallback) |
256 | 261 | | if r.status_code != 200: |
257 | 262 | | raise snscrape.base.ScraperException(f'Got status code {r.status_code}') |
258 | 263 | | soup = bs4.BeautifulSoup(r.text, 'lxml') |
| skipped 34 lines |
293 | 298 | | kwargs['description'] = descriptionDiv.text |
294 | 299 | | |
295 | 300 | | for div in channelInfoDiv.find_all('div', class_ = 'tgme_channel_info_counter'): |
296 | | - | value, granularity = parse_num(div.find('span', class_ = 'counter_value').text) |
| 301 | + | value, granularity = _parse_num(div.find('span', class_ = 'counter_value').text) |
297 | 302 | | type_ = div.find('span', class_ = 'counter_type').text |
298 | 303 | | if type_ == 'members': |
299 | 304 | | # Already extracted more accurately from /channel, skip |
| skipped 11 lines |
311 | 316 | | def _cli_from_args(cls, args): |
312 | 317 | | return cls._cli_construct(args, args.channel) |
313 | 318 | | |
314 | | - | def parse_num(s): |
| 319 | + | def _parse_num(s): |
315 | 320 | | s = s.replace(' ', '') |
316 | 321 | | if s.endswith('M'): |
317 | 322 | | return int(float(s[:-1]) * 1e6), 10 ** (6 if '.' not in s else 6 - len(s[:-1].split('.')[1])) |
| skipped 1 lines |
319 | 324 | | return int(float(s[:-1]) * 1000), 10 ** (3 if '.' not in s else 3 - len(s[:-1].split('.')[1])) |
320 | 325 | | return int(s), 1 |
321 | 326 | | |
322 | | - | def durationStrToSeconds(durationStr): |
| 327 | + | def _durationStrToSeconds(durationStr): |
323 | 328 | | durationList = durationStr.split(':') |
324 | 329 | | return sum([int(s) * int(g) for s, g in zip([1, 60, 3600], reversed(durationList))]) |
325 | 330 | | |
326 | | - | def telegramResponseOkCallback(r): |
| 331 | + | def _telegramResponseOkCallback(r): |
327 | 332 | | if r.status_code == 200: |
328 | 333 | | return (True, None) |
329 | 334 | | return (False, f'{r.status_code=}') |
| skipped 1 lines |