STRLCPY/snscrape

■ ■ ■ ■ ■ ■

setup.py

		skipped 1 lines
2	2
3	3
4	4		setuptools.setup(
5		-	name = 'socialmediascraper',
	5	+	name = 'snscrape',
6	6		version = '0.0-dev',
7		-	description = 'A social media scraper',
8		-	packages = ['socialmediascraper'],
	7	+	description = 'A social network service scraper',
	8	+	packages = ['snscrape'],
9	9		install_requires = ['requests', 'lxml', 'beautifulsoup4'],
10	10		entry_points = {
11	11		'console_scripts': [
12		-	'smscrape = socialmediascraper.cli:main',
	12	+	'snscrape = snscrape.cli:main',
13	13		],
14	14		},
15	15		)
		skipped 1 lines

socialmediascraper/__init__.py snscrape/__init__.py

Content is identical

socialmediascraper/base.py snscrape/base.py

Content is identical

■ ■ ■ ■ ■ ■

socialmediascraper/cli.py snscrape/cli.py

1	1		import argparse
2	2		import logging
3		-	import socialmediascraper.base
4		-	import socialmediascraper.modules
	3	+	import snscrape.base
	4	+	import snscrape.modules
5	5
6	6
7	7		logger = logging.getLogger(__name__)
		skipped 7 lines
15	15		parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results')
16	16
17	17		subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
18		-	classes = socialmediascraper.base.Scraper.__subclasses__()
	18	+	classes = snscrape.base.Scraper.__subclasses__()
19	19		for cls in classes:
20	20		subparser = subparsers.add_parser(cls.name, formatter_class = argparse.ArgumentDefaultsHelpFormatter)
21	21		cls.setup_parser(subparser)
		skipped 45 lines

■ ■ ■ ■ ■ ■

socialmediascraper/modules/__init__.py snscrape/modules/__init__.py

1	1		import importlib
2	2		import os
3		-	import socialmediascraper.base
	3	+	import snscrape.base
4	4
5	5
6	6		def _import_modules():
		skipped 1 lines
8	8		for fn in files:
9	9		if fn.endswith('.py') and fn != '__init__.py':
10	10		# Import module if not already imported
11		-	moduleName = f'socialmediascraper.modules.{fn[:-3]}'
	11	+	moduleName = f'snscrape.modules.{fn[:-3]}'
12	12		module = importlib.import_module(moduleName)
13	13
14	14
		skipped 2 lines

■ ■ ■ ■ ■ ■

socialmediascraper/modules/facebook.py snscrape/modules/facebook.py

		skipped 1 lines
2	2		import json
3	3		import logging
4	4		import re
5		-	import socialmediascraper.base
	5	+	import snscrape.base
6	6		import urllib.parse
7	7
8	8
9	9		logger = logging.getLogger(__name__)
10	10
11	11
12		-	class FacebookUserScraper(socialmediascraper.base.Scraper):
	12	+	class FacebookUserScraper(snscrape.base.Scraper):
13	13		name = 'facebook-user'
14	14
15	15		def __init__(self, username, **kwargs):
		skipped 63 lines

■ ■ ■ ■ ■ ■

socialmediascraper/modules/googleplus.py snscrape/modules/googleplus.py

		skipped 2 lines
3	3		import json
4	4		import logging
5	5		import re
6		-	import socialmediascraper.base
	6	+	import snscrape.base
7	7
8	8
9	9		logger = logging.getLogger(__name__)
10	10
11	11
12		-	class GooglePlusUserScraper(socialmediascraper.base.Scraper):
	12	+	class GooglePlusUserScraper(snscrape.base.Scraper):
13	13		name = 'googleplus-user'
14	14
15	15		def __init__(self, user, **kwargs):
		skipped 32 lines
48	48		logger.info('User has no posts')
49	49		return
50	50		for postObj in response[0][7]:
51		-	yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
	51	+	yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
52	52		cursor = response[0][1] # 'ADSJ_x'
53	53		if cursor is None:
54	54		# No further pages
		skipped 31 lines
86	86		response = json.JSONDecoder().raw_decode(''.join(garbage[pos:]))[0] # Parses only the first structure in the data stream without throwing an error about the extra data at the end
87	87
88	88		for postObj in response[0][2]['74333095'][0][7]:
89		-	yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
	89	+	yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
90	90
91	91		cursor = response[0][2]['74333095'][0][1]
92	92
		skipped 11 lines

■ ■ ■ ■ ■ ■

socialmediascraper/modules/instagram.py snscrape/modules/instagram.py

1	1		import hashlib
2	2		import json
3	3		import logging
4		-	import socialmediascraper.base
	4	+	import snscrape.base
5	5
6	6
7	7		logger = logging.getLogger(__name__)
8	8
9	9
10		-	class InstagramUserScraper(socialmediascraper.base.Scraper):
	10	+	class InstagramUserScraper(snscrape.base.Scraper):
11	11		name = 'instagram-user'
12	12
13	13		def __init__(self, username, **kwargs):
		skipped 3 lines
17	17		def _response_to_items(self, response, username):
18	18		for node in response['user']['edge_owner_to_timeline_media']['edges']:
19	19		code = node['node']['shortcode']
20		-	yield socialmediascraper.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
	20	+	yield snscrape.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
21	21
22	22		def get_items(self):
23	23		headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
		skipped 52 lines

■ ■ ■ ■ ■ ■

socialmediascraper/modules/twitter.py snscrape/modules/twitter.py

1	1		import bs4
2	2		import json
3	3		import logging
4		-	import socialmediascraper.base
	4	+	import snscrape.base
5	5
6	6
7	7		logger = logging.getLogger(__name__)
8	8
9	9
10		-	class TwitterSearchScraper(socialmediascraper.base.Scraper):
	10	+	class TwitterSearchScraper(snscrape.base.Scraper):
11	11		name = 'twitter-search'
12	12
13	13		def __init__(self, query, **kwargs):
		skipped 9 lines
23	23		for tweet in feed:
24	24		username = tweet.find('span', 'username').find('b').text
25	25		tweetID = tweet['data-item-id']
26		-	yield socialmediascraper.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
	26	+	yield snscrape.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
27	27
28	28		def _check_json_callback(self, r):
29	29		if r.headers['content-type'] != 'application/json;charset=utf-8':
		skipped 80 lines