Projects STRLCPY maigret Commits 9858e713
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    maigret/checking.py
    skipped 53 lines
    54 54   decoded_content = response_content.decode(charset, "ignore")
    55 55   html_text = decoded_content
    56 56   
     57 + error = None
    57 58   if status_code == 0:
    58 59   error = CheckError("Connection lost")
    59  - else:
    60  - error = None
    61 60   
    62 61   logger.debug(html_text)
    63 62   
    skipped 9 lines
    73 72   error = CheckError("Interrupted")
    74 73   except Exception as e:
    75 74   # python-specific exceptions
    76  - if sys.version_info.minor > 6:
    77  - if isinstance(e, ssl.SSLCertVerificationError) or isinstance(
    78  - e, ssl.SSLError
    79  - ):
    80  - error = CheckError("SSL", str(e))
     75 + if sys.version_info.minor > 6 and (
     76 + isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError)
     77 + ):
     78 + error = CheckError("SSL", str(e))
    81 79   else:
    82 80   logger.debug(e, exc_info=True)
    83 81   error = CheckError("Unexpected", str(e))
    skipped 25 lines
    109 107   return None
    110 108   
    111 109   
     110 +def debug_response_logging(url, html_text, status_code, check_error):
     111 + with open("debug.log", "a") as f:
     112 + status = status_code or "No response"
     113 + f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
     114 + if html_text:
     115 + f.write(f"code: {status}\nresponse: {str(html_text)}\n")
     116 + 
     117 + 
    112 118  def process_site_result(
    113 119   response, query_notify, logger, results_info: QueryResultWrapper, site: MaigretSite
    114 120  ):
    skipped 27 lines
    142 148   response_time = None
    143 149   
    144 150   if logger.level == logging.DEBUG:
    145  - with open("debug.txt", "a") as f:
    146  - status = status_code or "No response"
    147  - f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
    148  - if html_text:
    149  - f.write(f"code: {status}\nresponse: {str(html_text)}\n")
     151 + debug_response_logging(url, html_text, status_code, check_error)
    150 152   
    151 153   # additional check for errors
    152 154   if status_code and not check_error:
    skipped 1 lines
    154 156   html_text, status_code, site.errors, site.ignore403
    155 157   )
    156 158   
    157  - if site.activation and html_text:
    158  - is_need_activation = any(
    159  - [s for s in site.activation["marks"] if s in html_text]
    160  - )
    161  - if is_need_activation:
    162  - method = site.activation["method"]
    163  - try:
    164  - activate_fun = getattr(ParsingActivator(), method)
    165  - # TODO: async call
    166  - activate_fun(site, logger)
    167  - except AttributeError:
    168  - logger.warning(
    169  - f"Activation method {method} for site {site.name} not found!"
    170  - )
    171  - except Exception as e:
    172  - logger.warning(f"Failed activation {method} for site {site.name}: {str(e)}", exc_info=True)
    173  - # TODO: temporary check error
     159 + # parsing activation
     160 + is_need_activation = any(
     161 + [s for s in site.activation.get("marks", []) if s in html_text]
     162 + )
     163 + 
     164 + if site.activation and html_text and is_need_activation:
     165 + method = site.activation["method"]
     166 + try:
     167 + activate_fun = getattr(ParsingActivator(), method)
     168 + # TODO: async call
     169 + activate_fun(site, logger)
     170 + except AttributeError:
     171 + logger.warning(
     172 + f"Activation method {method} for site {site.name} not found!"
     173 + )
     174 + except Exception as e:
     175 + logger.warning(
     176 + f"Failed activation {method} for site {site.name}: {str(e)}",
     177 + exc_info=True,
     178 + )
     179 + # TODO: temporary check error
    174 180   
    175 181   site_name = site.pretty_name
    176 182   # presense flags
    177 183   # True by default
    178 184   presense_flags = site.presense_strs
    179 185   is_presense_detected = False
     186 + 
    180 187   if html_text:
    181 188   if not presense_flags:
    182 189   is_presense_detected = True
    skipped 79 lines
    262 269   results_info["ids_usernames"] = new_usernames
    263 270   results_info["ids_links"] = eval(extracted_ids_data.get("links", "[]"))
    264 271   result.ids_data = extracted_ids_data
    265  - 
    266  - # Notify caller about results of query.
    267  - query_notify.update(result, site.similar_search)
    268 272   
    269 273   # Save status of request
    270 274   results_info["status"] = result
    skipped 142 lines
    413 417   response, query_notify, logger, default_result, site
    414 418   )
    415 419   
     420 + query_notify.update(response_result['status'], site.similar_search)
     421 + 
    416 422   return site.name, response_result
    417 423   
    418 424   
    skipped 198 lines
    617 623   "disabled": False,
    618 624   }
    619 625   
    620  - try:
    621  - check_data = [
    622  - (site.username_claimed, QueryStatus.CLAIMED),
    623  - (site.username_unclaimed, QueryStatus.AVAILABLE),
    624  - ]
    625  - except Exception as e:
    626  - logger.error(e)
    627  - logger.error(site.__dict__)
    628  - check_data = []
     626 + check_data = [
     627 + (site.username_claimed, QueryStatus.CLAIMED),
     628 + (site.username_unclaimed, QueryStatus.AVAILABLE),
     629 + ]
    629 630   
    630 631   logger.info(f"Checking {site.name}...")
    631 632   
    skipped 94 lines
  • ■ ■ ■ ■ ■
    maigret/errors.py
    skipped 53 lines
    54 54   'Censorship', 'MGTS'
    55 55   ),
    56 56   'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
    57  - 'Сайт заблокирован хостинг-провайдером': CheckError('Site-specific', 'Site is disabled (Beget)'),
     57 + 'Сайт заблокирован хостинг-провайдером': CheckError(
     58 + 'Site-specific', 'Site is disabled (Beget)'
     59 + ),
    58 60  }
    59 61   
    60 62  ERRORS_TYPES = {
    skipped 68 lines
  • ■ ■ ■ ■ ■ ■
    maigret/notify.py
    skipped 151 lines
    152 152   
    153 153   return
    154 154   
     155 + def make_colored_terminal_notify(
     156 + self, status, text, status_color, text_color, appendix
     157 + ):
     158 + text = [
     159 + f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
     160 + + f"{text_color} {text}: {Style.RESET_ALL}"
     161 + + f"{appendix}"
     162 + ]
     163 + return "".join(text)
     164 + 
     165 + def make_simple_terminal_notify(
     166 + self, status, text, status_color, text_color, appendix
     167 + ):
     168 + return f"[{status}] {text}: {appendix}"
     169 + 
     170 + def make_terminal_notify(self, *args):
     171 + if self.color:
     172 + return self.make_colored_terminal_notify(*args)
     173 + else:
     174 + return self.make_simple_terminal_notify(*args)
     175 + 
    155 176   def start(self, message, id_type):
    156 177   """Notify Start.
    157 178   
    skipped 46 lines
    204 225   Return Value:
    205 226   Nothing.
    206 227   """
     228 + notify = None
    207 229   self.result = result
    208 230   
    209  - if not self.result.ids_data:
    210  - ids_data_text = ""
    211  - else:
     231 + ids_data_text = ""
     232 + if self.result.ids_data:
    212 233   ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
    213 234   
    214  - def make_colored_terminal_notify(
    215  - status, text, status_color, text_color, appendix
    216  - ):
    217  - text = [
    218  - f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
    219  - + f"{text_color} {text}: {Style.RESET_ALL}"
    220  - + f"{appendix}"
    221  - ]
    222  - return "".join(text)
    223  - 
    224  - def make_simple_terminal_notify(status, text, appendix):
    225  - return f"[{status}] {text}: {appendix}"
    226  - 
    227  - def make_terminal_notify(is_colored=True, *args):
    228  - if is_colored:
    229  - return make_colored_terminal_notify(*args)
    230  - else:
    231  - return make_simple_terminal_notify(*args)
    232  - 
    233  - notify = None
    234  - 
    235 235   # Output to the terminal is desired.
    236 236   if result.status == QueryStatus.CLAIMED:
    237 237   color = Fore.BLUE if is_similar else Fore.GREEN
    238 238   status = "?" if is_similar else "+"
    239  - notify = make_terminal_notify(
    240  - self.color,
     239 + notify = self.make_terminal_notify(
    241 240   status,
    242 241   result.site_name,
    243 242   color,
    skipped 2 lines
    246 245   )
    247 246   elif result.status == QueryStatus.AVAILABLE:
    248 247   if not self.print_found_only:
    249  - notify = make_terminal_notify(
    250  - self.color,
     248 + notify = self.make_terminal_notify(
    251 249   "-",
    252 250   result.site_name,
    253 251   Fore.RED,
    skipped 2 lines
    256 254   )
    257 255   elif result.status == QueryStatus.UNKNOWN:
    258 256   if not self.skip_check_errors:
    259  - notify = make_terminal_notify(
    260  - self.color,
     257 + notify = self.make_terminal_notify(
    261 258   "?",
    262 259   result.site_name,
    263 260   Fore.RED,
    skipped 3 lines
    267 264   elif result.status == QueryStatus.ILLEGAL:
    268 265   if not self.print_found_only:
    269 266   text = "Illegal Username Format For This Site!"
    270  - notify = make_terminal_notify(
    271  - self.color,
     267 + notify = self.make_terminal_notify(
    272 268   "-",
    273 269   result.site_name,
    274 270   Fore.RED,
    skipped 10 lines
    285 281   if notify:
    286 282   sys.stdout.write("\x1b[1K\r")
    287 283   print(notify)
    288  - 
    289  - return
    290 284   
    291 285   def __str__(self):
    292 286   """Convert Object To String.
    skipped 11 lines
  • ■ ■ ■ ■ ■ ■
    maigret/report.py
    skipped 292 lines
    293 293   os.remove(filename)
    294 294   workbook = xmind.load(filename)
    295 295   sheet = workbook.getPrimarySheet()
    296  - design_sheet(sheet, username, results)
     296 + design_xmind_sheet(sheet, username, results)
    297 297   xmind.save(workbook, path=filename)
    298 298   
    299 299   
    300  -def design_sheet(sheet, username, results):
     300 +def add_xmind_subtopic(userlink, k, v, supposed_data):
     301 + currentsublabel = userlink.addSubTopic()
     302 + field = "fullname" if k == "name" else k
     303 + if field not in supposed_data:
     304 + supposed_data[field] = []
     305 + supposed_data[field].append(v)
     306 + currentsublabel.setTitle("%s: %s" % (k, v))
     307 + 
     308 + 
     309 +def design_xmind_sheet(sheet, username, results):
    301 310   alltags = {}
    302 311   supposed_data = {}
    303 312   
    skipped 7 lines
    311 320   
    312 321   for website_name in results:
    313 322   dictionary = results[website_name]
     323 + result_status = dictionary.get("status")
     324 + if result_status.status != QueryStatus.CLAIMED:
     325 + continue
    314 326   
    315  - if dictionary.get("status").status == QueryStatus.CLAIMED:
    316  - # firsttime I found that entry
    317  - for tag in dictionary.get("status").tags:
    318  - if tag.strip() == "":
    319  - continue
    320  - if tag not in alltags.keys():
    321  - if not is_country_tag(tag):
    322  - tagsection = root_topic1.addSubTopic()
    323  - tagsection.setTitle(tag)
    324  - alltags[tag] = tagsection
     327 + stripped_tags = list(map(lambda x: x.strip(), result_status.tags))
     328 + normalized_tags = list(
     329 + filter(lambda x: x and not is_country_tag(x), stripped_tags)
     330 + )
    325 331   
    326  - category = None
    327  - for tag in dictionary.get("status").tags:
    328  - if tag.strip() == "":
    329  - continue
    330  - if not is_country_tag(tag):
    331  - category = tag
     332 + category = None
     333 + for tag in normalized_tags:
     334 + if tag in alltags.keys():
     335 + continue
     336 + tagsection = root_topic1.addSubTopic()
     337 + tagsection.setTitle(tag)
     338 + alltags[tag] = tagsection
     339 + category = tag
    332 340   
    333  - if category is None:
    334  - userlink = undefinedsection.addSubTopic()
    335  - userlink.addLabel(dictionary.get("status").site_url_user)
     341 + section = alltags[category] if category else undefinedsection
     342 + userlink = section.addSubTopic()
     343 + userlink.addLabel(result_status.site_url_user)
     344 + 
     345 + ids_data = result_status.ids_data or {}
     346 + for k, v in ids_data.items():
     347 + # suppose target data
     348 + if isinstance(v, list):
     349 + for currentval in v:
     350 + add_xmind_subtopic(userlink, k, currentval, supposed_data)
    336 351   else:
    337  - userlink = alltags[category].addSubTopic()
    338  - userlink.addLabel(dictionary.get("status").site_url_user)
     352 + add_xmind_subtopic(userlink, k, v, supposed_data)
    339 353   
    340  - if dictionary.get("status").ids_data:
    341  - for k, v in dictionary.get("status").ids_data.items():
    342  - # suppose target data
    343  - if not isinstance(v, list):
    344  - currentsublabel = userlink.addSubTopic()
    345  - field = "fullname" if k == "name" else k
    346  - if field not in supposed_data:
    347  - supposed_data[field] = []
    348  - supposed_data[field].append(v)
    349  - currentsublabel.setTitle("%s: %s" % (k, v))
    350  - else:
    351  - for currentval in v:
    352  - currentsublabel = userlink.addSubTopic()
    353  - field = "fullname" if k == "name" else k
    354  - if field not in supposed_data:
    355  - supposed_data[field] = []
    356  - supposed_data[field].append(currentval)
    357  - currentsublabel.setTitle("%s: %s" % (k, currentval))
    358 354   # add supposed data
    359  - filterede_supposed_data = filter_supposed_data(supposed_data)
    360  - if len(filterede_supposed_data) > 0:
     355 + filtered_supposed_data = filter_supposed_data(supposed_data)
     356 + if len(filtered_supposed_data) > 0:
    361 357   undefinedsection = root_topic1.addSubTopic()
    362 358   undefinedsection.setTitle("SUPPOSED DATA")
    363  - for k, v in filterede_supposed_data.items():
     359 + for k, v in filtered_supposed_data.items():
    364 360   currentsublabel = undefinedsection.addSubTopic()
    365 361   currentsublabel.setTitle("%s: %s" % (k, v))
    366 362   
    skipped 9 lines
  • ■ ■ ■ ■ ■ ■
    maigret/sites.py
    skipped 166 lines
    167 167   
    168 168   return result
    169 169   
     170 + def get_url_type(self) -> str:
     171 + url = URLMatcher.extract_main_part(self.url)
     172 + if url.startswith("{username}"):
     173 + url = "SUBDOMAIN"
     174 + elif url == "":
     175 + url = f"{self.url} ({self.engine})"
     176 + else:
     177 + parts = url.split("/")
     178 + url = "/" + "/".join(parts[1:])
     179 + return url
     180 + 
    170 181   def update(self, updates: "dict") -> "MaigretSite":
    171 182   self.__dict__.update(updates)
    172 183   self.update_detectors()
    skipped 232 lines
    405 416   if not sites_dict:
    406 417   sites_dict = self.sites_dict()
    407 418   
     419 + urls = {}
     420 + tags = {}
    408 421   output = ""
    409 422   disabled_count = 0
    410 423   total_count = len(sites_dict)
    411  - urls = {}
    412  - tags = {}
    413 424   
    414 425   for _, site in sites_dict.items():
    415 426   if site.disabled:
    416 427   disabled_count += 1
    417 428   
    418  - url = URLMatcher.extract_main_part(site.url)
    419  - if url.startswith("{username}"):
    420  - url = "SUBDOMAIN"
    421  - elif url == "":
    422  - url = f"{site.url} ({site.engine})"
    423  - else:
    424  - parts = url.split("/")
    425  - url = "/" + "/".join(parts[1:])
    426  - 
    427  - urls[url] = urls.get(url, 0) + 1
     429 + url_type = site.get_url_type()
     430 + urls[url_type] = urls.get(url_type, 0) + 1
    428 431   
    429 432   if not site.tags:
    430 433   tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
    431 434   
    432  - for tag in site.tags:
    433  - if is_country_tag(tag):
    434  - # currenty do not display country tags
    435  - continue
     435 + for tag in filter(lambda x: not is_country_tag(x), site.tags):
    436 436   tags[tag] = tags.get(tag, 0) + 1
    437 437   
    438 438   output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
    skipped 2 lines
    441 441   if count == 1:
    442 442   break
    443 443   output += f"{count}\t{url}\n"
     444 + 
    444 445   output += "Top sites' tags:\n"
    445  - for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True):
     446 + for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:20]:
    446 447   mark = ""
    447 448   if tag not in SUPPORTED_TAGS:
    448 449   mark = " (non-standard)"
    skipped 4 lines
Please wait...
Page is in error, reload to recover