STRLCPY/snscrape

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

Total 1 files

■ ■ ■ ■ ■ ■

snscrape/modules/facebook.py

		skipped 79 lines
80	80		return False, None
81	81
82	82		def _soup_to_items(self, soup, baseUrl, mode):
	83	+	cleanUrl = None # Value from previous iteration is used for warning on link-less entries
83	84		for entry in soup.find_all('div', class_ = '_5pcr'): # also class 'fbUserContent' in 2017 and 'userContentWrapper' in 2019
84	85		entryA = entry.find('a', class_ = '_5pcq') # There can be more than one, e.g. when a post is shared by another user, but the first one is always the one of this entry.
85	86		mediaSetA = entry.find('a', class_ = '_17z-')
		skipped 10 lines
96	97		logger.warning(f'Ignoring odd link: {href}')
97	98		continue
98	99		dirtyUrl = urllib.parse.urljoin(baseUrl, href)
	100	+	cleanUrl = self._clean_url(dirtyUrl)
99	101		date = datetime.datetime.fromtimestamp(int(entry.find('abbr', class_ = '_5ptz')['data-utime']), datetime.timezone.utc)
100	102		contentDiv = entry.find('div', class_ = '_5pbx')
101	103		if contentDiv:
		skipped 14 lines
116	118		outlink = query['u'][0]
117	119		if outlink.startswith('http://') or outlink.startswith('https://') and outlink not in outlinks:
118	120		outlinks.append(outlink)
119		-	yield FacebookPost(cleanUrl = self._clean_url(dirtyUrl), dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks, outlinksss = ' '.join(outlinks))
	121	+	yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks, outlinksss = ' '.join(outlinks))
120	122
121	123
122	124		class FacebookUserScraper(FacebookCommonScraper):
		skipped 123 lines