Projects STRLCPY snscrape Commits fdc40f74
🤬
  • ■ ■ ■ ■ ■ ■
    snscrape/modules/vkontakte.py
    skipped 38 lines
    39 39   
    40 40   
    41 41  @dataclasses.dataclass
     42 +class User(snscrape.base.Entity):
     43 + username: str
     44 + name: str
     45 + verified: bool
     46 + description: typing.Optional[str] = None
     47 + websites: typing.Optional[typing.List[str]] = None
     48 + followers: typing.Optional[snscrape.base.IntWithGranularity] = None
     49 + posts: typing.Optional[snscrape.base.IntWithGranularity] = None
     50 + photos: typing.Optional[snscrape.base.IntWithGranularity] = None
     51 + tags: typing.Optional[snscrape.base.IntWithGranularity] = None
     52 + following: typing.Optional[snscrape.base.IntWithGranularity] = None
     53 + 
     54 + followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity')
     55 + postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity')
     56 + photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity')
     57 + tagsGranularity = snscrape.base._DeprecatedProperty('tagsGranularity', lambda self: self.tags.granularity, 'tags.granularity')
     58 + followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity')
     59 + 
     60 + def __str__(self):
     61 + return f'https://vk.com/{self.username}'
     62 + 
     63 + 
     64 +@dataclasses.dataclass
    42 65  class VKontaktePost(snscrape.base.Item):
    43 66   url: str
    44 67   date: typing.Optional[typing.Union[datetime.datetime, datetime.date]]
    45 68   content: str
     69 + user: User
    46 70   outlinks: typing.Optional[typing.List[str]] = None
    47 71   photos: typing.Optional[typing.List['Photo']] = None
    48 72   video: typing.Optional['Video'] = None
    skipped 25 lines
    74 98   thumbUrl: str
    75 99   
    76 100   
    77  -@dataclasses.dataclass
    78  -class User(snscrape.base.Entity):
    79  - username: str
    80  - name: str
    81  - verified: bool
    82  - description: typing.Optional[str] = None
    83  - websites: typing.Optional[typing.List[str]] = None
    84  - followers: typing.Optional[snscrape.base.IntWithGranularity] = None
    85  - posts: typing.Optional[snscrape.base.IntWithGranularity] = None
    86  - photos: typing.Optional[snscrape.base.IntWithGranularity] = None
    87  - tags: typing.Optional[snscrape.base.IntWithGranularity] = None
    88  - following: typing.Optional[snscrape.base.IntWithGranularity] = None
    89  - 
    90  - followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity')
    91  - postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity')
    92  - photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity')
    93  - tagsGranularity = snscrape.base._DeprecatedProperty('tagsGranularity', lambda self: self.tags.granularity, 'tags.granularity')
    94  - followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity')
    95  - 
    96  - def __str__(self):
    97  - return f'https://vk.com/{self.username}'
    98  - 
    99  - 
    100 101  class VKontakteUserScraper(snscrape.base.Scraper):
    101 102   name = 'vkontakte-user'
    102 103   
    skipped 113 lines
    216 217   photoUrl = f'https://vk.com{a["href"]}' if 'href' in a.attrs and a['href'].startswith('/photo') and a['href'][6:].strip('0123456789-_') == '' else None
    217 218   photos.append(Photo(variants = photoVariants, url = photoUrl))
    218 219   quotedPost = self._post_div_to_item(quoteDiv, isCopy = True) if (quoteDiv := post.find('div', class_ = 'copy_quote')) else None
     220 + authorHeading = post.find('h5', class_ = ['post_author', 'copy_post_author'])
     221 + authorLink = authorHeading.find('a', class_ = ['author', 'copy_author'])
     222 + username = authorLink['href'].split('/')[-1]
     223 + name = authorLink.text
     224 + if authorHeading.find('div', class_ = 'page_verified') is not None:
     225 + verified = True
     226 + else:
     227 + verified = False
     228 + user = User(username = username, name = name, verified = verified)
    219 229   return VKontaktePost(
    220  - url = url,
    221  - date = self._date_span_to_date(dateSpan),
    222  - content = textDiv.text if textDiv else None,
    223  - outlinks = outlinks or None,
    224  - photos = photos or None,
    225  - video = video or None,
    226  - quotedPost = quotedPost,
     230 + url = url,
     231 + date = self._date_span_to_date(dateSpan),
     232 + content = textDiv.text if textDiv else None,
     233 + user = user,
     234 + outlinks = outlinks or None,
     235 + photos = photos or None,
     236 + video = video or None,
     237 + quotedPost = quotedPost,
    227 238   )
    228 239   
    229 240   def _soup_to_items(self, soup):
    skipped 149 lines
    379 390   # On public pages, this is where followers are listed
    380 391   if (followersDiv := soup.find('div', id = 'public_followers')):
    381 392   if (topDiv := followersDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Followers':
     393 + kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text))
     394 + # On community groups, this is where followers are listed
     395 + elif (followersDiv := soup.find('div', class_ = 'group_friends_text')):
     396 + kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(followersDiv.find('span', class_ = 'group_friends_count').text))
     397 + # On public groups, this is where followers are listed
     398 + elif (followersDiv := soup.find('div', id = 'group_followers')):
     399 + if (topDiv := followersDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Members':
    382 400   kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text))
    383 401   
    384 402   return User(**kwargs)
    skipped 9 lines
Please wait...
Page is in error, reload to recover