■ ■ ■ ■ ■ ■
snscrape/modules/telegram.py
| skipped 26 lines |
27 | 27 | | @dataclasses.dataclass |
28 | 28 | | class Channel(snscrape.base.Entity): |
29 | 29 | | username: str |
30 | | - | title: str |
31 | | - | verified: bool |
32 | | - | photo: str |
| 30 | + | title: typing.Optional[str] = None |
| 31 | + | verified: typing.Optional[bool] = None |
| 32 | + | photo: typing.Optional[str] = None |
33 | 33 | | description: typing.Optional[str] = None |
34 | 34 | | members: typing.Optional[int] = None |
35 | 35 | | photos: typing.Optional[snscrape.base.IntWithGranularity] = None |
| skipped 87 lines |
123 | 123 | | content = message.get_text(separator="\n") |
124 | 124 | | |
125 | 125 | | for video_player in post.find_all('a', {'class': 'tgme_widget_message_video_player'}): |
126 | | - | |
127 | | - | style = video_player.find('i')['style'] |
128 | | - | videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style) |
129 | | - | videoTag = video_player.find('video') |
130 | | - | if videoTag is None: |
| 126 | + | iTag = video_player.find('i') |
| 127 | + | if iTag is None: |
131 | 128 | | videoUrl = None |
| 129 | + | videoThumbnailUrl = None |
132 | 130 | | else: |
133 | | - | videoUrl = videoTag['src'] |
| 131 | + | style = iTag['style'] |
| 132 | + | videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style)[0] |
| 133 | + | videoTag = video_player.find('video') |
| 134 | + | if videoTag is None: |
| 135 | + | videoUrl = None |
| 136 | + | else: |
| 137 | + | videoUrl = videoTag['src'] |
134 | 138 | | mKwargs = { |
135 | 139 | | 'thumbnailUrl': videoThumbnailUrl, |
136 | 140 | | 'url': videoUrl, |
| skipped 9 lines |
146 | 150 | | if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')): |
147 | 151 | | forwardedUrl = forward_tag['href'] |
148 | 152 | | forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0] |
149 | | - | forwardedChannelScraper = TelegramChannelScraper(name = forwardedName) |
150 | | - | forwarded = forwardedChannelScraper._get_entity() |
| 153 | + | forwarded = Channel(username = forwardedName) |
151 | 154 | | |
152 | 155 | | outlinks = [] |
153 | 156 | | for link in post.find_all('a'): |
| skipped 59 lines |
213 | 216 | | if not pageLink: |
214 | 217 | | break |
215 | 218 | | nextPageUrl = urllib.parse.urljoin(r.url, pageLink['href']) |
216 | | - | r = self._get(nextPageUrl, headers = self._headers) |
| 219 | + | r = self._get(nextPageUrl, headers = self._headers, responseOkCallback = telegramResponseOkCallback) |
217 | 220 | | if r.status_code != 200: |
218 | 221 | | raise snscrape.base.ScraperException(f'Got status code {r.status_code}') |
219 | 222 | | soup = bs4.BeautifulSoup(r.text, 'lxml') |
| skipped 60 lines |
280 | 283 | | return int(float(s[:-1]) * 1000), 10 ** (3 if '.' not in s else 3 - len(s[:-1].split('.')[1])) |
281 | 284 | | else: |
282 | 285 | | return int(s), 1 |
| 286 | + | |
| 287 | + | def telegramResponseOkCallback(r): |
| 288 | + | if r.status_code == 200: |
| 289 | + | return (True, None) |
| 290 | + | elif r.status_code // 100 == 5: |
| 291 | + | return (False, f'status code: {r.status_code}') |
| 292 | + | else: |
| 293 | + | return (False, None) |