Projects STRLCPY snscrape Commits 6b6ae3d3
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    setup.py
    skipped 1 lines
    2 2   
    3 3   
    4 4  setuptools.setup(
    5  - name = 'socialmediascraper',
     5 + name = 'snscrape',
    6 6   version = '0.0-dev',
    7  - description = 'A social media scraper',
    8  - packages = ['socialmediascraper'],
     7 + description = 'A social network service scraper',
     8 + packages = ['snscrape'],
    9 9   install_requires = ['requests', 'lxml', 'beautifulsoup4'],
    10 10   entry_points = {
    11 11   'console_scripts': [
    12  - 'smscrape = socialmediascraper.cli:main',
     12 + 'snscrape = snscrape.cli:main',
    13 13   ],
    14 14   },
    15 15  )
    skipped 1 lines
  • socialmediascraper/__init__.py snscrape/__init__.py
    Content is identical
  • socialmediascraper/base.py snscrape/base.py
    Content is identical
  • ■ ■ ■ ■ ■ ■
    socialmediascraper/cli.py snscrape/cli.py
    1 1  import argparse
    2 2  import logging
    3  -import socialmediascraper.base
    4  -import socialmediascraper.modules
     3 +import snscrape.base
     4 +import snscrape.modules
    5 5   
    6 6   
    7 7  logger = logging.getLogger(__name__)
    skipped 7 lines
    15 15   parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results')
    16 16   
    17 17   subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
    18  - classes = socialmediascraper.base.Scraper.__subclasses__()
     18 + classes = snscrape.base.Scraper.__subclasses__()
    19 19   for cls in classes:
    20 20   subparser = subparsers.add_parser(cls.name, formatter_class = argparse.ArgumentDefaultsHelpFormatter)
    21 21   cls.setup_parser(subparser)
    skipped 45 lines
  • ■ ■ ■ ■ ■ ■
    socialmediascraper/modules/__init__.py snscrape/modules/__init__.py
    1 1  import importlib
    2 2  import os
    3  -import socialmediascraper.base
     3 +import snscrape.base
    4 4   
    5 5   
    6 6  def _import_modules():
    skipped 1 lines
    8 8   for fn in files:
    9 9   if fn.endswith('.py') and fn != '__init__.py':
    10 10   # Import module if not already imported
    11  - moduleName = f'socialmediascraper.modules.{fn[:-3]}'
     11 + moduleName = f'snscrape.modules.{fn[:-3]}'
    12 12   module = importlib.import_module(moduleName)
    13 13   
    14 14   
    skipped 2 lines
  • ■ ■ ■ ■ ■ ■
    socialmediascraper/modules/facebook.py snscrape/modules/facebook.py
    skipped 1 lines
    2 2  import json
    3 3  import logging
    4 4  import re
    5  -import socialmediascraper.base
     5 +import snscrape.base
    6 6  import urllib.parse
    7 7   
    8 8   
    9 9  logger = logging.getLogger(__name__)
    10 10   
    11 11   
    12  -class FacebookUserScraper(socialmediascraper.base.Scraper):
     12 +class FacebookUserScraper(snscrape.base.Scraper):
    13 13   name = 'facebook-user'
    14 14   
    15 15   def __init__(self, username, **kwargs):
    skipped 63 lines
  • ■ ■ ■ ■ ■ ■
    socialmediascraper/modules/googleplus.py snscrape/modules/googleplus.py
    skipped 2 lines
    3 3  import json
    4 4  import logging
    5 5  import re
    6  -import socialmediascraper.base
     6 +import snscrape.base
    7 7   
    8 8   
    9 9  logger = logging.getLogger(__name__)
    10 10   
    11 11   
    12  -class GooglePlusUserScraper(socialmediascraper.base.Scraper):
     12 +class GooglePlusUserScraper(snscrape.base.Scraper):
    13 13   name = 'googleplus-user'
    14 14   
    15 15   def __init__(self, user, **kwargs):
    skipped 32 lines
    48 48   logger.info('User has no posts')
    49 49   return
    50 50   for postObj in response[0][7]:
    51  - yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
     51 + yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
    52 52   cursor = response[0][1] # 'ADSJ_x'
    53 53   if cursor is None:
    54 54   # No further pages
    skipped 31 lines
    86 86   response = json.JSONDecoder().raw_decode(''.join(garbage[pos:]))[0] # Parses only the first structure in the data stream without throwing an error about the extra data at the end
    87 87   
    88 88   for postObj in response[0][2]['74333095'][0][7]:
    89  - yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
     89 + yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
    90 90   
    91 91   cursor = response[0][2]['74333095'][0][1]
    92 92   
    skipped 11 lines
  • ■ ■ ■ ■ ■ ■
    socialmediascraper/modules/instagram.py snscrape/modules/instagram.py
    1 1  import hashlib
    2 2  import json
    3 3  import logging
    4  -import socialmediascraper.base
     4 +import snscrape.base
    5 5   
    6 6   
    7 7  logger = logging.getLogger(__name__)
    8 8   
    9 9   
    10  -class InstagramUserScraper(socialmediascraper.base.Scraper):
     10 +class InstagramUserScraper(snscrape.base.Scraper):
    11 11   name = 'instagram-user'
    12 12   
    13 13   def __init__(self, username, **kwargs):
    skipped 3 lines
    17 17   def _response_to_items(self, response, username):
    18 18   for node in response['user']['edge_owner_to_timeline_media']['edges']:
    19 19   code = node['node']['shortcode']
    20  - yield socialmediascraper.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
     20 + yield snscrape.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
    21 21   
    22 22   def get_items(self):
    23 23   headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    skipped 52 lines
  • ■ ■ ■ ■ ■ ■
    socialmediascraper/modules/twitter.py snscrape/modules/twitter.py
    1 1  import bs4
    2 2  import json
    3 3  import logging
    4  -import socialmediascraper.base
     4 +import snscrape.base
    5 5   
    6 6   
    7 7  logger = logging.getLogger(__name__)
    8 8   
    9 9   
    10  -class TwitterSearchScraper(socialmediascraper.base.Scraper):
     10 +class TwitterSearchScraper(snscrape.base.Scraper):
    11 11   name = 'twitter-search'
    12 12   
    13 13   def __init__(self, query, **kwargs):
    skipped 9 lines
    23 23   for tweet in feed:
    24 24   username = tweet.find('span', 'username').find('b').text
    25 25   tweetID = tweet['data-item-id']
    26  - yield socialmediascraper.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
     26 + yield snscrape.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
    27 27   
    28 28   def _check_json_callback(self, r):
    29 29   if r.headers['content-type'] != 'application/json;charset=utf-8':
    skipped 80 lines
Please wait...
Page is in error, reload to recover