Projects STRLCPY snscrape Commits 6df35177
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    snscrape/modules/facebook.py
    skipped 79 lines
    80 80   return False, None
    81 81   
    82 82   def _soup_to_items(self, soup, baseUrl, mode):
     83 + cleanUrl = None # Value from previous iteration is used for warning on link-less entries
    83 84   for entry in soup.find_all('div', class_ = '_5pcr'): # also class 'fbUserContent' in 2017 and 'userContentWrapper' in 2019
    84 85   entryA = entry.find('a', class_ = '_5pcq') # There can be more than one, e.g. when a post is shared by another user, but the first one is always the one of this entry.
    85 86   mediaSetA = entry.find('a', class_ = '_17z-')
    skipped 10 lines
    96 97   logger.warning(f'Ignoring odd link: {href}')
    97 98   continue
    98 99   dirtyUrl = urllib.parse.urljoin(baseUrl, href)
     100 + cleanUrl = self._clean_url(dirtyUrl)
    99 101   date = datetime.datetime.fromtimestamp(int(entry.find('abbr', class_ = '_5ptz')['data-utime']), datetime.timezone.utc)
    100 102   contentDiv = entry.find('div', class_ = '_5pbx')
    101 103   if contentDiv:
    skipped 14 lines
    116 118   outlink = query['u'][0]
    117 119   if outlink.startswith('http://') or outlink.startswith('https://') and outlink not in outlinks:
    118 120   outlinks.append(outlink)
    119  - yield FacebookPost(cleanUrl = self._clean_url(dirtyUrl), dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks, outlinksss = ' '.join(outlinks))
     121 + yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks, outlinksss = ' '.join(outlinks))
    120 122   
    121 123   
    122 124  class FacebookUserScraper(FacebookCommonScraper):
    skipped 123 lines
Please wait...
Page is in error, reload to recover