Projects STRLCPY maigret Commits 53f72eda
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    maigret/maigret.py
    skipped 12 lines
    13 13  from .checking import *
    14 14  from .notify import QueryNotifyPrint
    15 15  from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
    16  - generate_report_context, save_txt_report
     16 + generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
     17 + save_json_report
    17 18  from .submit import submit_dialog
    18 19   
    19 20  __version__ = '0.1.13'
    skipped 36 lines
    56 57   action="store", dest="proxy", default=None,
    57 58   help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
    58 59   )
    59  - parser.add_argument("--json", "-j", metavar="JSON_FILE",
    60  - dest="json_file", default=None,
    61  - help="Load data from a JSON file or an online, valid, JSON file.")
     60 + parser.add_argument("--db", metavar="DB_FILE",
     61 + dest="db_file", default=None,
     62 + help="Load Maigret database from a JSON file or an online, valid, JSON file.")
    62 63   parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
    63 64   dest="cookie_file", default=None,
    64 65   help="File with cookies.")
    skipped 26 lines
    91 92   action="store_true", dest="print_check_errors", default=False,
    92 93   help="Print errors messages: connection, captcha, site country ban, etc."
    93 94   )
    94  - parser.add_argument("--submit",
     95 + parser.add_argument("--submit", metavar='EXISTING_USER_URL',
    95 96   type=str, dest="new_site_to_submit", default=False,
    96 97   help="URL of existing profile in new site to submit."
    97 98   )
    skipped 60 lines
    158 159   dest="pdf", default=False,
    159 160   help="Generate a PDF report (general report on all usernames)."
    160 161   )
     162 + parser.add_argument("-J", "--json",
     163 + action="store", metavar='REPORT_TYPE',
     164 + dest="json", default='', type=check_supported_json_format,
     165 + help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
     166 + " (one report per username)."
     167 + )
    161 168   
    162 169   args = parser.parse_args()
    163 170   
    skipped 42 lines
    206 213   if args.tags:
    207 214   args.tags = list(set(str(args.tags).split(',')))
    208 215   
    209  - if args.json_file is None:
    210  - args.json_file = \
     216 + if args.db_file is None:
     217 + args.db_file = \
    211 218   os.path.join(os.path.dirname(os.path.realpath(__file__)),
    212 219   "resources/data.json"
    213 220   )
    skipped 9 lines
    223 230   color=not args.no_color)
    224 231   
    225 232   # Create object with all information about sites we are aware of.
    226  - db = MaigretDatabase().load_from_file(args.json_file)
     233 + db = MaigretDatabase().load_from_file(args.db_file)
    227 234   get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
    228 235   names=args.site_list,
    229 236   disabled=False, id_type=x)
    skipped 3 lines
    233 240   if args.new_site_to_submit:
    234 241   is_submitted = await submit_dialog(db, args.new_site_to_submit)
    235 242   if is_submitted:
    236  - db.save_to_file(args.json_file)
     243 + db.save_to_file(args.db_file)
    237 244   
    238 245   # Database self-checking
    239 246   if args.self_check:
    skipped 1 lines
    241 248   is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
    242 249   if is_need_update:
    243 250   if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
    244  - db.save_to_file(args.json_file)
     251 + db.save_to_file(args.db_file)
    245 252   print('Database was successfully updated.')
    246 253   else:
    247 254   print('Updates will be applied only for current search session.')
    skipped 91 lines
    339 346   save_txt_report(filename, username, results)
    340 347   query_notify.warning(f'TXT report for {username} saved in {filename}')
    341 348   
     349 + if args.json:
     350 + filename = report_filepath_tpl.format(username=username, postfix=f'_{args.json}.json')
     351 + save_json_report(filename, username, results, report_type=args.json)
     352 + query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
     353 + 
     354 + 
    342 355   # reporting for all the result
    343 356   if general_results:
    344 357   if args.html or args.pdf:
    skipped 12 lines
    357 370   save_pdf_report(filename, report_context)
    358 371   query_notify.warning(f'PDF report on all usernames saved in {filename}')
    359 372   # update database
    360  - db.save_to_file(args.json_file)
     373 + db.save_to_file(args.db_file)
    361 374   
    362 375   
    363 376  def run():
    skipped 11 lines
  • ■ ■ ■ ■ ■ ■
    maigret/report.py
    1 1  import csv
     2 +import json
    2 3  import io
    3 4  import logging
    4 5  import os
    skipped 2 lines
    7 8  from datetime import datetime
    8 9  from jinja2 import Template
    9 10  from xhtml2pdf import pisa
     11 +from argparse import ArgumentTypeError
    10 12  from dateutil.parser import parse as parse_datetime_str
    11 13   
    12 14  from .result import QueryStatus
    13 15  from .utils import is_country_tag, CaseConverter, enrich_link_str
     16 + 
     17 +SUPPORTED_JSON_REPORT_FORMATS = [
     18 + 'simple',
     19 + 'ndjson',
     20 +]
    14 21   
    15 22   
    16 23  '''
    skipped 33 lines
    50 57   filled_template = template.render(**context)
    51 58   with open(filename, 'w+b') as f:
    52 59   pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
     60 + 
     61 +def save_json_report(filename: str, username: str, results: dict, report_type: str):
     62 + with open(filename, 'w', encoding='utf-8') as f:
     63 + generate_json_report(username, results, f, report_type=report_type)
    53 64   
    54 65   
    55 66  '''
    skipped 169 lines
    225 236   file.write(dictionary["url_user"] + "\n")
    226 237   file.write(f'Total Websites Username Detected On : {exists_counter}')
    227 238   
     239 + 
     240 +def generate_json_report(username: str, results: dict, file, report_type):
     241 + exists_counter = 0
     242 + is_report_per_line = report_type.startswith('ndjson')
     243 + all_json = {}
     244 + 
     245 + for sitename in results:
     246 + site_result = results[sitename]
     247 + # TODO: fix no site data issue
     248 + if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
     249 + continue
     250 + 
     251 + data = dict(site_result)
     252 + data['status'] = data['status'].json()
     253 + 
     254 + if is_report_per_line:
     255 + data['sitename'] = sitename
     256 + file.write(json.dumps(data)+'\n')
     257 + else:
     258 + all_json[sitename] = data
     259 + 
     260 + if not is_report_per_line:
     261 + file.write(json.dumps(all_json))
     262 + 
    228 263  '''
    229 264  XMIND 8 Functions
    230 265  '''
    skipped 74 lines
    305 340   currentsublabel = undefinedsection.addSubTopic()
    306 341   currentsublabel.setTitle("%s: %s" % (k, v))
    307 342   
     343 + 
     344 +def check_supported_json_format(value):
     345 + if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
     346 + raise ArgumentTypeError(f'JSON report type must be one of the following types: '
     347 + + ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
     348 + return value
    308 349   
    309 350   
  • ■ ■ ■ ■ ■ ■
    maigret/resources/data.json
    skipped 23095 lines
    23096 23096   "urlMain": "https://www.are.na",
    23097 23097   "usernameClaimed": "nate-cassel",
    23098 23098   "usernameUnclaimed": "noonewouldeverusethis7"
     23099 + },
     23100 + "mywishboard.com": {
     23101 + "checkType": "message",
     23102 + "presenseStrs": [
     23103 + "profile-header",
     23104 + " profile-header__col"
     23105 + ],
     23106 + "absenceStrs": [
     23107 + "This page could not be found"
     23108 + ],
     23109 + "url": "https://mywishboard.com/@{username}",
     23110 + "urlMain": "https://mywishboard.com",
     23111 + "usernameClaimed": "alex",
     23112 + "usernameUnclaimed": "noonewouldeverusethis7"
     23113 + },
     23114 + "crafta.ua": {
     23115 + "checkType": "message",
     23116 + "presenseStrs": [
     23117 + "cft-profile-about"
     23118 + ],
     23119 + "absenceStrs": [
     23120 + "Page not found"
     23121 + ],
     23122 + "url": "https://{username}.crafta.ua/",
     23123 + "urlMain": "https://crafta.ua",
     23124 + "usernameClaimed": "test",
     23125 + "usernameUnclaimed": "noonewouldeverusethis7"
     23126 + },
     23127 + "m.smutty.com": {
     23128 + "tags": [
     23129 + "erotic"
     23130 + ],
     23131 + "checkType": "message",
     23132 + "presenseStrs": [
     23133 + "profile_stats_n"
     23134 + ],
     23135 + "absenceStrs": [
     23136 + "Not Found</span>"
     23137 + ],
     23138 + "url": "https://m.smutty.com/user/{username}/",
     23139 + "urlMain": "https://m.smutty.com",
     23140 + "usernameClaimed": "alex",
     23141 + "usernameUnclaimed": "noonewouldeverusethis7"
     23142 + },
     23143 + "www.marykay.ru": {
     23144 + "checkType": "message",
     23145 + "presenseStrs": [
     23146 + "email"
     23147 + ],
     23148 + "absenceStrs": [
     23149 + "errorPage"
     23150 + ],
     23151 + "url": "https://www.marykay.ru/{username}",
     23152 + "urlMain": "https://www.marykay.ru",
     23153 + "usernameClaimed": "anna",
     23154 + "usernameUnclaimed": "noonewouldeverusethis7"
    23099 23155   }
    23100 23156   },
    23101 23157   "engines": {
    skipped 103 lines
  • ■ ■ ■ ■ ■
    maigret/result.py
    1  -"""Sherlock Result Module
     1 +"""Maigret Result Module
    2 2   
    3 3  This module defines various objects for recording the results of queries.
    4 4  """
    skipped 69 lines
    74 74   self.ids_data = ids_data
    75 75   self.tags = tags
    76 76   
     77 + def json(self):
     78 + return {
     79 + 'username': self.username,
     80 + 'site_name': self.site_name,
     81 + 'url': self.site_url_user,
     82 + 'status': str(self.status),
     83 + 'ids': self.ids_data or {},
     84 + 'tags': self.tags,
     85 + }
    77 86   
    78 87   def __str__(self):
    79 88   """Convert Object To String.
    skipped 15 lines
  • ■ ■ ■ ■ ■
    maigret/submit.py
    1 1  import difflib
     2 +import json
    2 3   
    3 4  import requests
    4 5  from mock import Mock
    skipped 5 lines
    10 11   
    11 12  RATIO = 0.6
    12 13  TOP_FEATURES = 5
     14 +URL_RE = re.compile(r'https?://(www\.)?')
    13 15   
    14 16   
    15 17  def get_match_ratio(x):
    skipped 68 lines
    84 86   
    85 87   
    86 88  async def submit_dialog(db, url_exists):
     89 + domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
     90 + domain_raw = domain_raw.split('/')[0]
     91 + 
     92 + matched_sites = list(filter(lambda x: domain_raw in x.url_main+x.url, db.sites))
     93 + if matched_sites:
     94 + print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
     95 + status = lambda s: '(disabled)' if s.disabled else ''
     96 + url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
     97 + print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
     98 + return False
     99 + 
    87 100   url_parts = url_exists.split('/')
    88 101   supposed_username = url_parts[-1]
    89 102   new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
    skipped 13 lines
    103 116   a_minus_b = tokens_a.difference(tokens_b)
    104 117   b_minus_a = tokens_b.difference(tokens_a)
    105 118   
    106  - top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: '))
    107  - if not top_features_count:
    108  - top_features_count = TOP_FEATURES
     119 + top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
    109 120   
    110 121   presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
    111 122   
    skipped 51 lines
  • ■ ■ ■ ■ ■ ■
    tests/test_report.py
    1 1  """Maigret reports test functions"""
    2 2  import copy
     3 +import json
    3 4  import os
    4 5  from io import StringIO
    5 6   
    skipped 1 lines
    7 8  from jinja2 import Template
    8 9   
    9 10  from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
    10  - save_pdf_report, generate_report_template, generate_report_context
     11 + save_pdf_report, generate_report_template, generate_report_context, generate_json_report
    11 12  from maigret.result import QueryResult, QueryStatus
    12 13   
    13 14  EXAMPLE_RESULTS = {
    skipped 130 lines
    144 145   'https://www.github.com/test\n',
    145 146   'Total Websites Username Detected On : 1',
    146 147   ]
     148 + 
     149 + 
     150 +def test_generate_json_simple_report():
     151 + jsonfile = StringIO()
     152 + MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
     153 + MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
     154 + generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'simple')
     155 + 
     156 + jsonfile.seek(0)
     157 + data = jsonfile.readlines()
     158 + 
     159 + assert len(data) == 1
     160 + assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
     161 + 
     162 + 
     163 +def test_generate_json_ndjson_report():
     164 + jsonfile = StringIO()
     165 + MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
     166 + MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
     167 + generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'ndjson')
     168 + 
     169 + jsonfile.seek(0)
     170 + data = jsonfile.readlines()
     171 + 
     172 + assert len(data) == 2
     173 + assert json.loads(data[0])['sitename'] == 'GitHub'
    147 174   
    148 175   
    149 176  def test_save_xmind_report():
    skipped 35 lines
Please wait...
Page is in error, reload to recover