Projects STRLCPY snscrape Commits 4e59638e
🤬
  • added a forwardedUrl attribute to TelegramPost and made forwarded attribute type Channel.

  • Loading...
  • Tristan Lee committed 2 years ago
    4e59638e
    1 parent a7eb54d2
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    snscrape/modules/telegram.py
    skipped 24 lines
    25 25   
    26 26   
    27 27  @dataclasses.dataclass
    28  -class TelegramPost(snscrape.base.Item):
    29  - url: str
    30  - date: datetime.datetime
    31  - content: str
    32  - outlinks: list
    33  - media: typing.Optional[typing.List['Medium']]
    34  - forwarded: str
    35  - views: int = None
    36  - linkPreview: typing.Optional[LinkPreview] = None
    37  - 
    38  - outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks')
    39  - 
    40  - def __str__(self):
    41  - return self.url
    42  - 
    43  - 
    44  -@dataclasses.dataclass
    45 28  class Channel(snscrape.base.Entity):
    46 29   username: str
    47 30   title: str
    skipped 14 lines
    62 45   def __str__(self):
    63 46   return f'https://t.me/s/{self.username}'
    64 47   
     48 +@dataclasses.dataclass
     49 +class TelegramPost(snscrape.base.Item):
     50 + url: str
     51 + date: datetime.datetime
     52 + content: str
     53 + outlinks: list
     54 + forwarded: typing.Optional['Channel'] = None
     55 + forwardedUrl: typing.Optional[str] = None
     56 + media: typing.Optional[typing.List['Medium']] = None
     57 + views: typing.Optional[int] = None
     58 + linkPreview: typing.Optional[LinkPreview] = None
     59 + 
     60 + outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks')
     61 + 
     62 + def __str__(self):
     63 + return self.url
     64 + 
    65 65  class Medium:
    66 66   pass
    67 67   
    skipped 50 lines
    118 118   date = datetime.datetime.strptime(dateDiv.find('time', datetime = True)['datetime'].replace('-', '', 2).replace(':', ''), '%Y%m%dT%H%M%S%z')
    119 119   media = []
    120 120   forwarded = None
     121 + forwardedUrl = None
    121 122   if (message := post.find('div', class_ = 'tgme_widget_message_text')):
    122 123   content = message.get_text(separator="\n")
    123 124   
    skipped 19 lines
    143 144   mKwargs['duration'] = sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationStr))])
    144 145   media.append(cls(**mKwargs))
    145 146   if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
    146  - forwarded = forward_tag['href'].split('t.me/')[1].split('/')[0]
     147 + forwardedUrl = forward_tag['href']
     148 + forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0]
     149 + forwardedChannelScraper = TelegramChannelScraper(name = forwardedName)
     150 + forwarded = forwardedChannelScraper._get_entity()
     151 + 
    147 152   outlinks = []
    148 153   for link in post.find_all('a'):
    149 154   if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')):
    skipped 45 lines
    195 200   else:
    196 201   views = parse_num(viewsSpan.text)
    197 202  
    198  - yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, views = views)
     203 + yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, forwardedUrl = forwardedUrl, views = views)
    199 204   
    200 205   def get_items(self):
    201 206   r, soup = self._initial_page()
    skipped 76 lines
Please wait...
Page is in error, reload to recover