Projects STRLCPY snscrape Commits 20ea117a
🤬
  • ■ ■ ■ ■ ■ ■
    snscrape/modules/vkontakte.py
    skipped 79 lines
    80 80   if r.status_code != 200:
    81 81   logger.error(f'Got status code {r.status_code}')
    82 82   return
    83  - fields = r.content.split(b'<!>')
    84  - if fields[5].startswith(b'<div class="page_block no_posts">'):
     83 + # Convert to JSON and read the HTML payload. Note that this implicitly converts the data to a Python string (i.e., Unicode), away from a windows-1251-encoded bytes.
     84 + posts = r.json()['payload'][1][0]
     85 + if posts.startswith('<div class="page_block no_posts">'):
    85 86   # Reached the end
    86 87   break
    87  - if not fields[5].startswith(b'<div id="post'):
    88  - logger.error(f'Got an unknown response: {fields[5][:200]!r}...')
     88 + if not posts.startswith('<div id="post'):
     89 + logger.error(f'Got an unknown response: {posts[:200]!r}...')
    89 90   break
    90  - soup = bs4.BeautifulSoup(fields[5], 'lxml', from_encoding = r.encoding)
     91 + soup = bs4.BeautifulSoup(posts, 'lxml')
    91 92   yield from self._soup_to_items(soup, baseUrl)
    92 93   
    93 94   @classmethod
    skipped 8 lines
Please wait...
Page is in error, reload to recover