■ ■ ■ ■ ■ ■
snscrape/modules/telegram.py
| skipped 24 lines |
25 | 25 | | |
26 | 26 | | |
27 | 27 | | @dataclasses.dataclass |
28 | | - | class TelegramPost(snscrape.base.Item): |
29 | | - | url: str |
30 | | - | date: datetime.datetime |
31 | | - | content: str |
32 | | - | outlinks: list |
33 | | - | media: typing.Optional[typing.List['Medium']] |
34 | | - | forwarded: str |
35 | | - | views: int = None |
36 | | - | linkPreview: typing.Optional[LinkPreview] = None |
37 | | - | |
38 | | - | outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks') |
39 | | - | |
40 | | - | def __str__(self): |
41 | | - | return self.url |
42 | | - | |
43 | | - | |
44 | | - | @dataclasses.dataclass |
45 | 28 | | class Channel(snscrape.base.Entity): |
46 | 29 | | username: str |
47 | 30 | | title: str |
| skipped 14 lines |
62 | 45 | | def __str__(self): |
63 | 46 | | return f'https://t.me/s/{self.username}' |
64 | 47 | | |
| 48 | + | @dataclasses.dataclass |
| 49 | + | class TelegramPost(snscrape.base.Item): |
| 50 | + | url: str |
| 51 | + | date: datetime.datetime |
| 52 | + | content: str |
| 53 | + | outlinks: list |
| 54 | + | forwarded: typing.Optional['Channel'] = None |
| 55 | + | forwardedUrl: typing.Optional[str] = None |
| 56 | + | media: typing.Optional[typing.List['Medium']] = None |
| 57 | + | views: typing.Optional[int] = None |
| 58 | + | linkPreview: typing.Optional[LinkPreview] = None |
| 59 | + | |
| 60 | + | outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks') |
| 61 | + | |
| 62 | + | def __str__(self): |
| 63 | + | return self.url |
| 64 | + | |
65 | 65 | | class Medium: |
66 | 66 | | pass |
67 | 67 | | |
| skipped 50 lines |
118 | 118 | | date = datetime.datetime.strptime(dateDiv.find('time', datetime = True)['datetime'].replace('-', '', 2).replace(':', ''), '%Y%m%dT%H%M%S%z') |
119 | 119 | | media = [] |
120 | 120 | | forwarded = None |
| 121 | + | forwardedUrl = None |
121 | 122 | | if (message := post.find('div', class_ = 'tgme_widget_message_text')): |
122 | 123 | | content = message.get_text(separator="\n") |
123 | 124 | | |
| skipped 19 lines |
143 | 144 | | mKwargs['duration'] = sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationStr))]) |
144 | 145 | | media.append(cls(**mKwargs)) |
145 | 146 | | if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')): |
146 | | - | forwarded = forward_tag['href'].split('t.me/')[1].split('/')[0] |
| 147 | + | forwardedUrl = forward_tag['href'] |
| 148 | + | forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0] |
| 149 | + | forwardedChannelScraper = TelegramChannelScraper(name = forwardedName) |
| 150 | + | forwarded = forwardedChannelScraper._get_entity() |
| 151 | + | |
147 | 152 | | outlinks = [] |
148 | 153 | | for link in post.find_all('a'): |
149 | 154 | | if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')): |
| skipped 45 lines |
195 | 200 | | else: |
196 | 201 | | views = parse_num(viewsSpan.text) |
197 | 202 | | |
198 | | - | yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, views = views) |
| 203 | + | yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, forwardedUrl = forwardedUrl, views = views) |
199 | 204 | | |
200 | 205 | | def get_items(self): |
201 | 206 | | r, soup = self._initial_page() |
| skipped 76 lines |