Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
Total 1 files
■ ■ ■ ■ ■ ■
snscrape/modules/facebook.py
skipped 79 lines
80
80
return False, None
81
81
82
82
def _soup_to_items(self, soup, baseUrl, mode):
83
+
cleanUrl = None # Value from previous iteration is used for warning on link-less entries
83
84
for entry in soup.find_all('div', class_ = '_5pcr'): # also class 'fbUserContent' in 2017 and 'userContentWrapper' in 2019
84
85
entryA = entry.find('a', class_ = '_5pcq') # There can be more than one, e.g. when a post is shared by another user, but the first one is always the one of this entry.
85
86
mediaSetA = entry.find('a', class_ = '_17z-')
skipped 10 lines
96
97
logger.warning(f'Ignoring odd link: {href}')
97
98
continue
98
99
dirtyUrl = urllib.parse.urljoin(baseUrl, href)
100
+
cleanUrl = self._clean_url(dirtyUrl)
99
101
date = datetime.datetime.fromtimestamp(int(entry.find('abbr', class_ = '_5ptz')['data-utime']), datetime.timezone.utc)
100
102
contentDiv = entry.find('div', class_ = '_5pbx')
101
103
if contentDiv:
skipped 14 lines
116
118
outlink = query['u'][0]
117
119
if outlink.startswith('http://') or outlink.startswith('https://') and outlink not in outlinks: