STRLCPY/snscrape

Total 1 files

■ ■ ■ ■ ■ ■

snscrape/modules/telegram.py

		skipped 223 lines
224	224		if '/s/' not in r.url:
225	225		_logger.warning('No public post list for this user')
226	226		return
	227	+	nextPageUrl = ''
227	228		while True:
228	229		yield from self._soup_to_items(soup, r.url)
	230	+	try:
	231	+	if soup.find('a', attrs = {'class': 'tgme_widget_message_date'}, href = True)['href'].split('/')[-1] == '1':
	232	+	# if message 1 is the first message in the page, terminate scraping
	233	+	break
	234	+	except:
	235	+	pass
229	236		pageLink = soup.find('a', attrs = {'class': 'tme_messages_more', 'data-before': True})
230	237		if not pageLink:
	238	+	# some pages are missing a "tme_messages_more" tag, causing early termination
	239	+	if '=' not in nextPageUrl:
	240	+	nextPageUrl = soup.find('link', attrs = {'rel': 'canonical'}, href = True)['href']
231	241		nextPostIndex = int(nextPageUrl.split('=')[-1]) - 20
232	242		if nextPostIndex > 20:
233	243		pageLink = {'href': nextPageUrl.split('=')[0] + f'={nextPostIndex}'}
		skipped 81 lines