| skipped 1 lines |
2 | 2 | | Maigret main module |
3 | 3 | | """ |
4 | 4 | | |
5 | | - | import asyncio |
6 | | - | import logging |
7 | 5 | | import os |
8 | 6 | | import platform |
9 | | - | import re |
10 | | - | import ssl |
11 | 7 | | import sys |
12 | 8 | | from argparse import ArgumentParser, RawDescriptionHelpFormatter |
13 | 9 | | |
14 | | - | import aiohttp |
15 | 10 | | import requests |
16 | | - | import tqdm.asyncio |
17 | | - | from aiohttp_socks import ProxyConnector |
18 | | - | from mock import Mock |
19 | | - | from python_socks import _errors as proxy_errors |
20 | | - | from socid_extractor import parse, extract, __version__ as socid_version |
| 11 | + | from socid_extractor import parse, __version__ as socid_version |
21 | 12 | | |
22 | | - | from .activation import ParsingActivator, import_aiohttp_cookies |
| 13 | + | from .checking import * |
23 | 14 | | from .notify import QueryNotifyPrint |
24 | 15 | | from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \ |
25 | 16 | | generate_report_context, save_txt_report |
26 | | - | from .result import QueryResult, QueryStatus |
27 | | - | from .sites import MaigretDatabase, MaigretSite |
| 17 | + | from .submit import submit_dialog |
28 | 18 | | |
29 | 19 | | __version__ = '0.1.13' |
30 | 20 | | |
31 | | - | supported_recursive_search_ids = ( |
32 | | - | 'yandex_public_id', |
33 | | - | 'gaia_id', |
34 | | - | 'vk_id', |
35 | | - | 'ok_id', |
36 | | - | 'wikimapia_uid', |
37 | | - | ) |
38 | | - | |
39 | | - | common_errors = { |
40 | | - | '<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha', |
41 | | - | 'Please stand by, while we are checking your browser': 'Cloudflare captcha', |
42 | | - | '<title>Доступ ограничен</title>': 'Rostelecom censorship', |
43 | | - | 'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha', |
44 | | - | 'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection', |
45 | | - | '404</h1><p class="error-card__description">Мы не нашли страницу': 'MegaFon 404 page', |
46 | | - | 'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship', |
47 | | - | 'Incapsula incident ID': 'Incapsula antibot protection', |
48 | | - | } |
49 | | - | |
50 | | - | unsupported_characters = '#' |
51 | | - | |
52 | | - | async def get_response(request_future, site_name, logger): |
53 | | - | html_text = None |
54 | | - | status_code = 0 |
55 | | - | |
56 | | - | error_text = "General Unknown Error" |
57 | | - | expection_text = None |
58 | | - | |
59 | | - | try: |
60 | | - | response = await request_future |
61 | | - | |
62 | | - | status_code = response.status |
63 | | - | response_content = await response.content.read() |
64 | | - | charset = response.charset or 'utf-8' |
65 | | - | decoded_content = response_content.decode(charset, 'ignore') |
66 | | - | html_text = decoded_content |
67 | | - | |
68 | | - | if status_code > 0: |
69 | | - | error_text = None |
70 | | - | |
71 | | - | logger.debug(html_text) |
72 | | - | |
73 | | - | except asyncio.TimeoutError as errt: |
74 | | - | error_text = "Timeout Error" |
75 | | - | expection_text = str(errt) |
76 | | - | except (ssl.SSLCertVerificationError, ssl.SSLError) as err: |
77 | | - | error_text = "SSL Error" |
78 | | - | expection_text = str(err) |
79 | | - | except aiohttp.client_exceptions.ClientConnectorError as err: |
80 | | - | error_text = "Error Connecting" |
81 | | - | expection_text = str(err) |
82 | | - | except aiohttp.http_exceptions.BadHttpMessage as err: |
83 | | - | error_text = "HTTP Error" |
84 | | - | expection_text = str(err) |
85 | | - | except proxy_errors.ProxyError as err: |
86 | | - | error_text = "Proxy Error" |
87 | | - | expection_text = str(err) |
88 | | - | except Exception as err: |
89 | | - | logger.warning(f'Unhandled error while requesting {site_name}: {err}') |
90 | | - | logger.debug(err, exc_info=True) |
91 | | - | error_text = "Some Error" |
92 | | - | expection_text = str(err) |
93 | | - | |
94 | | - | # TODO: return only needed information |
95 | | - | return html_text, status_code, error_text, expection_text |
96 | | - | |
97 | | - | |
98 | | - | async def update_site_dict_from_response(sitename, site_dict, results_info, semaphore, logger, query_notify): |
99 | | - | async with semaphore: |
100 | | - | site_obj = site_dict[sitename] |
101 | | - | future = site_obj.request_future |
102 | | - | if not future: |
103 | | - | # ignore: search by incompatible id type |
104 | | - | return |
105 | | - | |
106 | | - | response = await get_response(request_future=future, |
107 | | - | site_name=sitename, |
108 | | - | logger=logger) |
109 | | - | |
110 | | - | site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj) |
111 | | - | |
112 | | - | # TODO: move info separate module |
113 | | - | def detect_error_page(html_text, status_code, fail_flags, ignore_403): |
114 | | - | # Detect service restrictions such as a country restriction |
115 | | - | for flag, msg in fail_flags.items(): |
116 | | - | if flag in html_text: |
117 | | - | return 'Some site error', msg |
118 | | - | |
119 | | - | # Detect common restrictions such as provider censorship and bot protection |
120 | | - | for flag, msg in common_errors.items(): |
121 | | - | if flag in html_text: |
122 | | - | return 'Error', msg |
123 | | - | |
124 | | - | # Detect common site errors |
125 | | - | if status_code == 403 and not ignore_403: |
126 | | - | return 'Access denied', 'Access denied, use proxy/vpn' |
127 | | - | elif status_code >= 500: |
128 | | - | return f'Error {status_code}', f'Site error {status_code}' |
129 | | - | |
130 | | - | return None, None |
131 | | - | |
132 | | - | |
133 | | - | def process_site_result(response, query_notify, logger, results_info, site: MaigretSite): |
134 | | - | if not response: |
135 | | - | return results_info |
136 | | - | |
137 | | - | fulltags = site.tags |
138 | | - | |
139 | | - | # Retrieve other site information again |
140 | | - | username = results_info['username'] |
141 | | - | is_parsing_enabled = results_info['parsing_enabled'] |
142 | | - | url = results_info.get("url_user") |
143 | | - | logger.debug(url) |
144 | | - | |
145 | | - | status = results_info.get("status") |
146 | | - | if status is not None: |
147 | | - | # We have already determined the user doesn't exist here |
148 | | - | return results_info |
149 | | - | |
150 | | - | # Get the expected check type |
151 | | - | check_type = site.check_type |
152 | | - | |
153 | | - | # Get the failure messages and comments |
154 | | - | failure_errors = site.errors |
155 | | - | |
156 | | - | # TODO: refactor |
157 | | - | if not response: |
158 | | - | logger.error(f'No response for {site.name}') |
159 | | - | return results_info |
160 | | - | |
161 | | - | html_text, status_code, error_text, expection_text = response |
162 | | - | site_error_text = '?' |
163 | | - | |
164 | | - | # TODO: add elapsed request time counting |
165 | | - | response_time = None |
166 | | - | |
167 | | - | if logger.level == logging.DEBUG: |
168 | | - | with open('debug.txt', 'a') as f: |
169 | | - | status = status_code or 'No response' |
170 | | - | f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n') |
171 | | - | if html_text: |
172 | | - | f.write(f'code: {status}\nresponse: {str(html_text)}\n') |
173 | | - | |
174 | | - | if status_code and not error_text: |
175 | | - | error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors, |
176 | | - | site.ignore_403) |
177 | | - | |
178 | | - | if site.activation and html_text: |
179 | | - | is_need_activation = any([s for s in site.activation['marks'] if s in html_text]) |
180 | | - | if is_need_activation: |
181 | | - | method = site.activation['method'] |
182 | | - | try: |
183 | | - | activate_fun = getattr(ParsingActivator(), method) |
184 | | - | # TODO: async call |
185 | | - | activate_fun(site, logger) |
186 | | - | except AttributeError: |
187 | | - | logger.warning(f'Activation method {method} for site {site.name} not found!') |
188 | | - | |
189 | | - | # presense flags |
190 | | - | # True by default |
191 | | - | presense_flags = site.presense_strs |
192 | | - | is_presense_detected = False |
193 | | - | if html_text: |
194 | | - | if not presense_flags: |
195 | | - | is_presense_detected = True |
196 | | - | site.stats['presense_flag'] = None |
197 | | - | else: |
198 | | - | for presense_flag in presense_flags: |
199 | | - | if presense_flag in html_text: |
200 | | - | is_presense_detected = True |
201 | | - | site.stats['presense_flag'] = presense_flag |
202 | | - | logger.info(presense_flag) |
203 | | - | break |
204 | | - | |
205 | | - | if error_text is not None: |
206 | | - | logger.debug(error_text) |
207 | | - | result = QueryResult(username, |
208 | | - | site.name, |
209 | | - | url, |
210 | | - | QueryStatus.UNKNOWN, |
211 | | - | query_time=response_time, |
212 | | - | context=f'{error_text}: {site_error_text}', tags=fulltags) |
213 | | - | elif check_type == "message": |
214 | | - | absence_flags = site.absence_strs |
215 | | - | is_absence_flags_list = isinstance(absence_flags, list) |
216 | | - | absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags} |
217 | | - | # Checks if the error message is in the HTML |
218 | | - | is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set]) |
219 | | - | if not is_absence_detected and is_presense_detected: |
220 | | - | result = QueryResult(username, |
221 | | - | site.name, |
222 | | - | url, |
223 | | - | QueryStatus.CLAIMED, |
224 | | - | query_time=response_time, tags=fulltags) |
225 | | - | else: |
226 | | - | result = QueryResult(username, |
227 | | - | site.name, |
228 | | - | url, |
229 | | - | QueryStatus.AVAILABLE, |
230 | | - | query_time=response_time, tags=fulltags) |
231 | | - | elif check_type == "status_code": |
232 | | - | # Checks if the status code of the response is 2XX |
233 | | - | if (not status_code >= 300 or status_code < 200) and is_presense_detected: |
234 | | - | result = QueryResult(username, |
235 | | - | site.name, |
236 | | - | url, |
237 | | - | QueryStatus.CLAIMED, |
238 | | - | query_time=response_time, tags=fulltags) |
239 | | - | else: |
240 | | - | result = QueryResult(username, |
241 | | - | site.name, |
242 | | - | url, |
243 | | - | QueryStatus.AVAILABLE, |
244 | | - | query_time=response_time, tags=fulltags) |
245 | | - | elif check_type == "response_url": |
246 | | - | # For this detection method, we have turned off the redirect. |
247 | | - | # So, there is no need to check the response URL: it will always |
248 | | - | # match the request. Instead, we will ensure that the response |
249 | | - | # code indicates that the request was successful (i.e. no 404, or |
250 | | - | # forward to some odd redirect). |
251 | | - | if 200 <= status_code < 300 and is_presense_detected: |
252 | | - | result = QueryResult(username, |
253 | | - | site.name, |
254 | | - | url, |
255 | | - | QueryStatus.CLAIMED, |
256 | | - | query_time=response_time, tags=fulltags) |
257 | | - | else: |
258 | | - | result = QueryResult(username, |
259 | | - | site.name, |
260 | | - | url, |
261 | | - | QueryStatus.AVAILABLE, |
262 | | - | query_time=response_time, tags=fulltags) |
263 | | - | else: |
264 | | - | # It should be impossible to ever get here... |
265 | | - | raise ValueError(f"Unknown check type '{check_type}' for " |
266 | | - | f"site '{site.name}'") |
267 | | - | |
268 | | - | extracted_ids_data = {} |
269 | | - | |
270 | | - | if is_parsing_enabled and result.status == QueryStatus.CLAIMED: |
271 | | - | try: |
272 | | - | extracted_ids_data = extract(html_text) |
273 | | - | except Exception as e: |
274 | | - | logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True) |
275 | | - | |
276 | | - | if extracted_ids_data: |
277 | | - | new_usernames = {} |
278 | | - | for k, v in extracted_ids_data.items(): |
279 | | - | if 'username' in k: |
280 | | - | new_usernames[v] = 'username' |
281 | | - | if k in supported_recursive_search_ids: |
282 | | - | new_usernames[v] = k |
283 | | - | |
284 | | - | results_info['ids_usernames'] = new_usernames |
285 | | - | result.ids_data = extracted_ids_data |
286 | | - | |
287 | | - | # Notify caller about results of query. |
288 | | - | query_notify.update(result, site.similar_search) |
289 | | - | |
290 | | - | # Save status of request |
291 | | - | results_info['status'] = result |
292 | | - | |
293 | | - | # Save results from request |
294 | | - | results_info['http_status'] = status_code |
295 | | - | results_info['is_similar'] = site.similar_search |
296 | | - | # results_site['response_text'] = html_text |
297 | | - | results_info['rank'] = site.alexa_rank |
298 | | - | return results_info |
299 | | - | |
300 | | - | |
301 | | - | |
302 | | - | |
303 | | - | async def maigret(username, site_dict, query_notify, logger, |
304 | | - | proxy=None, timeout=None, recursive_search=False, |
305 | | - | id_type='username', debug=False, forced=False, |
306 | | - | max_connections=100, no_progressbar=False, |
307 | | - | cookies=None): |
308 | | - | """Main search func |
309 | | - | |
310 | | - | Checks for existence of username on various social media sites. |
311 | | - | |
312 | | - | Keyword Arguments: |
313 | | - | username -- String indicating username that report |
314 | | - | should be created against. |
315 | | - | site_dict -- Dictionary containing all of the site data. |
316 | | - | query_notify -- Object with base type of QueryNotify(). |
317 | | - | This will be used to notify the caller about |
318 | | - | query results. |
319 | | - | proxy -- String indicating the proxy URL |
320 | | - | timeout -- Time in seconds to wait before timing out request. |
321 | | - | Default is no timeout. |
322 | | - | recursive_search -- Search for other usernames in website pages & recursive search by them. |
323 | | - | |
324 | | - | Return Value: |
325 | | - | Dictionary containing results from report. Key of dictionary is the name |
326 | | - | of the social network site, and the value is another dictionary with |
327 | | - | the following keys: |
328 | | - | url_main: URL of main site. |
329 | | - | url_user: URL of user on site (if account exists). |
330 | | - | status: QueryResult() object indicating results of test for |
331 | | - | account existence. |
332 | | - | http_status: HTTP status code of query which checked for existence on |
333 | | - | site. |
334 | | - | response_text: Text that came back from request. May be None if |
335 | | - | there was an HTTP error when checking for existence. |
336 | | - | """ |
337 | | - | |
338 | | - | # Notify caller that we are starting the query. |
339 | | - | query_notify.start(username, id_type) |
340 | | - | |
341 | | - | # TODO: connector |
342 | | - | connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False) |
343 | | - | # connector = aiohttp.TCPConnector(ssl=False) |
344 | | - | connector.verify_ssl=False |
345 | | - | |
346 | | - | cookie_jar = None |
347 | | - | if cookies: |
348 | | - | cookie_jar = await import_aiohttp_cookies(cookies) |
349 | | - | |
350 | | - | session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar) |
351 | | - | |
352 | | - | if logger.level == logging.DEBUG: |
353 | | - | future = session.get(url='https://icanhazip.com') |
354 | | - | ip, status, error, expection = await get_response(future, None, logger) |
355 | | - | if ip: |
356 | | - | logger.debug(f'My IP is: {ip.strip()}') |
357 | | - | else: |
358 | | - | logger.debug(f'IP requesting {error}: {expection}') |
359 | | - | |
360 | | - | |
361 | | - | # Results from analysis of all sites |
362 | | - | results_total = {} |
363 | | - | |
364 | | - | # First create futures for all requests. This allows for the requests to run in parallel |
365 | | - | for site_name, site in site_dict.items(): |
366 | | - | |
367 | | - | if site.type != id_type: |
368 | | - | continue |
369 | | - | |
370 | | - | if site.disabled and not forced: |
371 | | - | logger.debug(f'Site {site.name} is disabled, skipping...') |
372 | | - | continue |
373 | | - | |
374 | | - | # Results from analysis of this specific site |
375 | | - | results_site = {} |
376 | | - | |
377 | | - | # Record URL of main site and username |
378 | | - | results_site['username'] = username |
379 | | - | results_site['parsing_enabled'] = recursive_search |
380 | | - | results_site['url_main'] = site.url_main |
381 | | - | results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None |
382 | | - | |
383 | | - | headers = { |
384 | | - | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0', |
385 | | - | } |
386 | | - | |
387 | | - | headers.update(site.headers) |
388 | | - | |
389 | | - | if not 'url' in site.__dict__: |
390 | | - | logger.error('No URL for site %s', site.name) |
391 | | - | # URL of user on site (if it exists) |
392 | | - | url = site.url.format( |
393 | | - | urlMain=site.url_main, |
394 | | - | urlSubpath=site.url_subpath, |
395 | | - | username=username |
396 | | - | ) |
397 | | - | # workaround to prevent slash errors |
398 | | - | url = re.sub('(?<!:)/+', '/', url) |
399 | | - | |
400 | | - | # Don't make request if username is invalid for the site |
401 | | - | if site.regex_check and re.search(site.regex_check, username) is None: |
402 | | - | # No need to do the check at the site: this user name is not allowed. |
403 | | - | results_site['status'] = QueryResult(username, |
404 | | - | site_name, |
405 | | - | url, |
406 | | - | QueryStatus.ILLEGAL) |
407 | | - | results_site["url_user"] = "" |
408 | | - | results_site['http_status'] = "" |
409 | | - | results_site['response_text'] = "" |
410 | | - | query_notify.update(results_site['status']) |
411 | | - | else: |
412 | | - | # URL of user on site (if it exists) |
413 | | - | results_site["url_user"] = url |
414 | | - | url_probe = site.url_probe |
415 | | - | if url_probe is None: |
416 | | - | # Probe URL is normal one seen by people out on the web. |
417 | | - | url_probe = url |
418 | | - | else: |
419 | | - | # There is a special URL for probing existence separate |
420 | | - | # from where the user profile normally can be found. |
421 | | - | url_probe = url_probe.format( |
422 | | - | urlMain=site.url_main, |
423 | | - | urlSubpath=site.url_subpath, |
424 | | - | username=username, |
425 | | - | ) |
426 | | - | |
427 | | - | for k, v in site.get_params.items(): |
428 | | - | url_probe += f'&{k}={v}' |
429 | | - | |
430 | | - | if site.check_type == 'status_code' and site.request_head_only: |
431 | | - | # In most cases when we are detecting by status code, |
432 | | - | # it is not necessary to get the entire body: we can |
433 | | - | # detect fine with just the HEAD response. |
434 | | - | request_method = session.head |
435 | | - | else: |
436 | | - | # Either this detect method needs the content associated |
437 | | - | # with the GET response, or this specific website will |
438 | | - | # not respond properly unless we request the whole page. |
439 | | - | request_method = session.get |
440 | | - | |
441 | | - | if site.check_type == "response_url": |
442 | | - | # Site forwards request to a different URL if username not |
443 | | - | # found. Disallow the redirect so we can capture the |
444 | | - | # http status from the original URL request. |
445 | | - | allow_redirects = False |
446 | | - | else: |
447 | | - | # Allow whatever redirect that the site wants to do. |
448 | | - | # The final result of the request will be what is available. |
449 | | - | allow_redirects = True |
450 | | - | |
451 | | - | future = request_method(url=url_probe, headers=headers, |
452 | | - | allow_redirects=allow_redirects, |
453 | | - | timeout=timeout, |
454 | | - | ) |
455 | | - | |
456 | | - | # Store future in data for access later |
457 | | - | # TODO: move to separate obj |
458 | | - | site.request_future = future |
459 | | - | |
460 | | - | # Add this site's results into final dictionary with all of the other results. |
461 | | - | results_total[site_name] = results_site |
462 | | - | |
463 | | - | # TODO: move into top-level function |
464 | | - | |
465 | | - | sem = asyncio.Semaphore(max_connections) |
466 | | - | |
467 | | - | tasks = [] |
468 | | - | for sitename, result_obj in results_total.items(): |
469 | | - | update_site_coro = update_site_dict_from_response(sitename, site_dict, result_obj, sem, logger, query_notify) |
470 | | - | future = asyncio.ensure_future(update_site_coro) |
471 | | - | tasks.append(future) |
472 | | - | |
473 | | - | if no_progressbar: |
474 | | - | await asyncio.gather(*tasks) |
475 | | - | else: |
476 | | - | for f in tqdm.asyncio.tqdm.as_completed(tasks): |
477 | | - | await f |
478 | | - | |
479 | | - | await session.close() |
480 | | - | |
481 | | - | # Notify caller that all queries are finished. |
482 | | - | query_notify.finish() |
483 | | - | |
484 | | - | return results_total |
485 | | - | |
486 | | - | |
487 | | - | def timeout_check(value): |
488 | | - | """Check Timeout Argument. |
489 | | - | |
490 | | - | Checks timeout for validity. |
491 | | - | |
492 | | - | Keyword Arguments: |
493 | | - | value -- Time in seconds to wait before timing out request. |
494 | | - | |
495 | | - | Return Value: |
496 | | - | Floating point number representing the time (in seconds) that should be |
497 | | - | used for the timeout. |
498 | | - | |
499 | | - | NOTE: Will raise an exception if the timeout in invalid. |
500 | | - | """ |
501 | | - | from argparse import ArgumentTypeError |
502 | | - | |
503 | | - | try: |
504 | | - | timeout = float(value) |
505 | | - | except ValueError: |
506 | | - | raise ArgumentTypeError(f"Timeout '{value}' must be a number.") |
507 | | - | if timeout <= 0: |
508 | | - | raise ArgumentTypeError(f"Timeout '{value}' must be greater than 0.0s.") |
509 | | - | return timeout |
510 | | - | |
511 | | - | |
512 | | - | async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False): |
513 | | - | query_notify = Mock() |
514 | | - | changes = { |
515 | | - | 'disabled': False, |
516 | | - | } |
517 | | - | |
518 | | - | try: |
519 | | - | check_data = [ |
520 | | - | (site.username_claimed, QueryStatus.CLAIMED), |
521 | | - | (site.username_unclaimed, QueryStatus.AVAILABLE), |
522 | | - | ] |
523 | | - | except: |
524 | | - | print(site.__dict__) |
525 | | - | |
526 | | - | logger.info(f'Checking {site.name}...') |
527 | | - | |
528 | | - | for username, status in check_data: |
529 | | - | async with semaphore: |
530 | | - | results_dict = await maigret( |
531 | | - | username, |
532 | | - | {site.name: site}, |
533 | | - | query_notify, |
534 | | - | logger, |
535 | | - | timeout=30, |
536 | | - | id_type=site.type, |
537 | | - | forced=True, |
538 | | - | no_progressbar=True, |
539 | | - | ) |
540 | | - | |
541 | | - | # don't disable entries with other ids types |
542 | | - | # TODO: make normal checking |
543 | | - | if site.name not in results_dict: |
544 | | - | logger.info(results_dict) |
545 | | - | changes['disabled'] = True |
546 | | - | continue |
547 | | - | |
548 | | - | result = results_dict[site.name]['status'] |
549 | | - | |
550 | | - | |
551 | | - | site_status = result.status |
552 | | - | |
553 | | - | if site_status != status: |
554 | | - | if site_status == QueryStatus.UNKNOWN: |
555 | | - | msgs = site.absence_strs |
556 | | - | etype = site.check_type |
557 | | - | logger.warning(f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}') |
558 | | - | # don't disable in case of available username |
559 | | - | if status == QueryStatus.CLAIMED: |
560 | | - | changes['disabled'] = True |
561 | | - | elif status == QueryStatus.CLAIMED: |
562 | | - | logger.warning(f'Not found `{username}` in {site.name}, must be claimed') |
563 | | - | logger.info(results_dict[site.name]) |
564 | | - | changes['disabled'] = True |
565 | | - | else: |
566 | | - | logger.warning(f'Found `{username}` in {site.name}, must be available') |
567 | | - | logger.info(results_dict[site.name]) |
568 | | - | changes['disabled'] = True |
569 | | - | |
570 | | - | logger.info(f'Site {site.name} checking is finished') |
571 | | - | |
572 | | - | if changes['disabled'] != site.disabled: |
573 | | - | site.disabled = changes['disabled'] |
574 | | - | db.update_site(site) |
575 | | - | if not silent: |
576 | | - | action = 'Disabled' if site.disabled else 'Enabled' |
577 | | - | print(f'{action} site {site.name}...') |
578 | | - | |
579 | | - | return changes |
580 | | - | |
581 | | - | |
582 | | - | async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False, |
583 | | - | max_connections=10) -> bool: |
584 | | - | sem = asyncio.Semaphore(max_connections) |
585 | | - | tasks = [] |
586 | | - | all_sites = site_data |
587 | | - | |
588 | | - | def disabled_count(lst): |
589 | | - | return len(list(filter(lambda x: x.disabled, lst))) |
590 | | - | |
591 | | - | disabled_old_count = disabled_count(all_sites.values()) |
592 | | - | |
593 | | - | for _, site in all_sites.items(): |
594 | | - | check_coro = site_self_check(site, logger, sem, db, silent) |
595 | | - | future = asyncio.ensure_future(check_coro) |
596 | | - | tasks.append(future) |
597 | | - | |
598 | | - | for f in tqdm.asyncio.tqdm.as_completed(tasks): |
599 | | - | await f |
600 | | - | |
601 | | - | disabled_new_count = disabled_count(all_sites.values()) |
602 | | - | total_disabled = disabled_new_count - disabled_old_count |
603 | | - | |
604 | | - | if total_disabled >= 0: |
605 | | - | message = 'Disabled' |
606 | | - | else: |
607 | | - | message = 'Enabled' |
608 | | - | total_disabled *= -1 |
609 | | - | |
610 | | - | if not silent: |
611 | | - | print(f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information') |
612 | | - | |
613 | | - | return total_disabled != 0 |
614 | | - | |
615 | 21 | | |
616 | 22 | | async def main(): |
617 | 23 | | version_string = '\n'.join([ |
| skipped 67 lines |
685 | 91 | | action="store_true", dest="print_check_errors", default=False, |
686 | 92 | | help="Print errors messages: connection, captcha, site country ban, etc." |
687 | 93 | | ) |
| 94 | + | parser.add_argument("--submit", |
| 95 | + | type=str, dest="new_site_to_submit", default=False, |
| 96 | + | help="URL of existing profile in new site to submit." |
| 97 | + | ) |
688 | 98 | | parser.add_argument("--no-color", |
689 | 99 | | action="store_true", dest="no_color", default=False, |
690 | 100 | | help="Don't color terminal output" |
| skipped 47 lines |
738 | 148 | | action="store_true", dest="html", default=False, |
739 | 149 | | help="Create an HTML report file (general report on all usernames)." |
740 | 150 | | ) |
741 | | - | parser.add_argument("-X","--xmind", |
| 151 | + | parser.add_argument("-X", "--xmind", |
742 | 152 | | action="store_true", |
743 | 153 | | dest="xmind", default=False, |
744 | 154 | | help="Generate an XMind 8 mindmap report (one report per username)." |
| skipped 75 lines |
820 | 230 | | |
821 | 231 | | site_data = get_top_sites_for_id(args.id_type) |
822 | 232 | | |
| 233 | + | if args.new_site_to_submit: |
| 234 | + | is_submitted = await submit_dialog(db, args.new_site_to_submit) |
| 235 | + | if is_submitted: |
| 236 | + | db.save_to_file(args.json_file) |
| 237 | + | |
823 | 238 | | # Database self-checking |
824 | 239 | | if args.self_check: |
825 | 240 | | print('Maigret sites database self-checking...') |
| skipped 48 lines |
874 | 289 | | |
875 | 290 | | if found_unsupported_chars: |
876 | 291 | | pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars)) |
877 | | - | query_notify.warning(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"') |
| 292 | + | query_notify.warning( |
| 293 | + | f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"') |
878 | 294 | | continue |
879 | 295 | | |
880 | 296 | | sites_to_check = get_top_sites_for_id(id_type) |
| skipped 71 lines |
952 | 368 | | print('Maigret is interrupted.') |
953 | 369 | | sys.exit(1) |
954 | 370 | | |
| 371 | + | |
955 | 372 | | if __name__ == "__main__": |
956 | 373 | | run() |
| 374 | + | |