■ ■ ■ ■ ■ ■
snscrape/modules/vkontakte.py
skipped 38 lines 39 39 40 40 41 41 @dataclasses.dataclass 42 + class User(snscrape.base.Entity): 43 + username: str 44 + name: str 45 + verified: bool 46 + description: typing.Optional[str] = None 47 + websites: typing.Optional[typing.List[str]] = None 48 + followers: typing.Optional[snscrape.base.IntWithGranularity] = None 49 + posts: typing.Optional[snscrape.base.IntWithGranularity] = None 50 + photos: typing.Optional[snscrape.base.IntWithGranularity] = None 51 + tags: typing.Optional[snscrape.base.IntWithGranularity] = None 52 + following: typing.Optional[snscrape.base.IntWithGranularity] = None 53 + 54 + followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity') 55 + postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity') 56 + photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity') 57 + tagsGranularity = snscrape.base._DeprecatedProperty('tagsGranularity', lambda self: self.tags.granularity, 'tags.granularity') 58 + followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity') 59 + 60 + def __str__(self): 61 + return f'https://vk.com/{self.username}' 62 + 63 + 64 + @dataclasses.dataclass 42 65 class VKontaktePost(snscrape.base.Item): 43 66 url: str 44 67 date: typing.Optional[typing.Union[datetime.datetime, datetime.date]] 45 68 content: str 69 + user: User 46 70 outlinks: typing.Optional[typing.List[str]] = None 47 71 photos: typing.Optional[typing.List['Photo']] = None 48 72 video: typing.Optional['Video'] = None skipped 25 lines 74 98 thumbUrl: str 75 99 76 100 77 - @dataclasses.dataclass 78 - class User(snscrape.base.Entity): 79 - username: str 80 - name: str 81 - verified: bool 82 - description: typing.Optional[str] = None 83 - websites: typing.Optional[typing.List[str]] = None 84 - followers: typing.Optional[snscrape.base.IntWithGranularity] = None 85 - posts: typing.Optional[snscrape.base.IntWithGranularity] = None 86 - photos: typing.Optional[snscrape.base.IntWithGranularity] = None 87 - tags: typing.Optional[snscrape.base.IntWithGranularity] = None 88 - following: typing.Optional[snscrape.base.IntWithGranularity] = None 89 - 90 - followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity') 91 - postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity') 92 - photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity') 93 - tagsGranularity = snscrape.base._DeprecatedProperty('tagsGranularity', lambda self: self.tags.granularity, 'tags.granularity') 94 - followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity') 95 - 96 - def __str__(self): 97 - return f'https://vk.com/{self.username}' 98 - 99 - 100 101 class VKontakteUserScraper(snscrape.base.Scraper): 101 102 name = 'vkontakte-user' 102 103 skipped 113 lines 216 217 photoUrl = f'https://vk.com{a["href"]}' if 'href' in a.attrs and a['href'].startswith('/photo') and a['href'][6:].strip('0123456789-_') == '' else None 217 218 photos.append(Photo(variants = photoVariants, url = photoUrl)) 218 219 quotedPost = self._post_div_to_item(quoteDiv, isCopy = True) if (quoteDiv := post.find('div', class_ = 'copy_quote')) else None 220 + authorHeading = post.find('h5', class_ = ['post_author', 'copy_post_author']) 221 + authorLink = authorHeading.find('a', class_ = ['author', 'copy_author']) 222 + username = authorLink['href'].split('/')[-1] 223 + name = authorLink.text 224 + if authorHeading.find('div', class_ = 'page_verified') is not None: 225 + verified = True 226 + else: 227 + verified = False 228 + user = User(username = username, name = name, verified = verified) 219 229 return VKontaktePost( 220 - url = url, 221 - date = self._date_span_to_date(dateSpan), 222 - content = textDiv.text if textDiv else None, 223 - outlinks = outlinks or None, 224 - photos = photos or None, 225 - video = video or None, 226 - quotedPost = quotedPost, 230 + url = url, 231 + date = self._date_span_to_date(dateSpan), 232 + content = textDiv.text if textDiv else None, 233 + user = user, 234 + outlinks = outlinks or None, 235 + photos = photos or None, 236 + video = video or None, 237 + quotedPost = quotedPost, 227 238 ) 228 239 229 240 def _soup_to_items(self, soup): skipped 149 lines 379 390 # On public pages, this is where followers are listed 380 391 if (followersDiv := soup.find('div', id = 'public_followers')): 381 392 if (topDiv := followersDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Followers': 393 + kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text)) 394 + # On community groups, this is where followers are listed 395 + elif (followersDiv := soup.find('div', class_ = 'group_friends_text')): 396 + kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(followersDiv.find('span', class_ = 'group_friends_count').text)) 397 + # On public groups, this is where followers are listed 398 + elif (followersDiv := soup.find('div', id = 'group_followers')): 399 + if (topDiv := followersDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Members': 382 400 kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text)) 383 401 384 402 return User(**kwargs) skipped 9 lines