Projects STRLCPY snscrape Commits 541173b0
🤬
  • ■ ■ ■ ■ ■ ■
    snscrape/modules/vkontakte.py
    skipped 55 lines
    56 56   logger.error('Private profile')
    57 57   return
    58 58   
     59 + profileDeleted = soup.find('h5', class_ = 'profile_deleted_text')
     60 + if profileDeleted:
     61 + # Unclear what this state represents, so just log website text.
     62 + logger.error(profileDeleted.text)
     63 + return
     64 + 
    59 65   newestPost = soup.find('div', class_ = 'post')
    60 66   if not newestPost:
    61 67   logger.info('Wall has no posts')
    skipped 18 lines
    80 86   if r.status_code != 200:
    81 87   logger.error(f'Got status code {r.status_code}')
    82 88   return
    83  - fields = r.content.split(b'<!>')
    84  - if fields[5].startswith(b'<div class="page_block no_posts">'):
     89 + # Convert to JSON and read the HTML payload. Note that this implicitly converts the data to a Python string (i.e., Unicode), away from a windows-1251-encoded bytes.
     90 + posts = r.json()['payload'][1][0]
     91 + if posts.startswith('<div class="page_block no_posts">'):
    85 92   # Reached the end
    86 93   break
    87  - if not fields[5].startswith(b'<div id="post'):
    88  - logger.error(f'Got an unknown response: {fields[5][:200]!r}...')
     94 + if not posts.startswith('<div id="post'):
     95 + logger.error(f'Got an unknown response: {posts[:200]!r}...')
    89 96   break
    90  - soup = bs4.BeautifulSoup(fields[5], 'lxml', from_encoding = r.encoding)
     97 + soup = bs4.BeautifulSoup(posts, 'lxml')
    91 98   yield from self._soup_to_items(soup, baseUrl)
    92 99   
    93 100   @classmethod
    skipped 8 lines
Please wait...
Page is in error, reload to recover