| skipped 1 lines |
2 | 2 | | """Maigret Sites Information""" |
3 | 3 | | import copy |
4 | 4 | | import json |
| 5 | + | import re |
5 | 6 | | import sys |
6 | 7 | | |
7 | 8 | | import requests |
8 | 9 | | |
9 | | - | from .utils import CaseConverter |
| 10 | + | from .utils import CaseConverter, URLMatcher |
10 | 11 | | |
11 | 12 | | |
12 | 13 | | class MaigretEngine: |
| skipped 8 lines |
21 | 22 | | |
22 | 23 | | |
23 | 24 | | class MaigretSite: |
| 25 | + | NOT_SERIALIZABLE_FIELDS = [ |
| 26 | + | 'name', |
| 27 | + | 'engineData', |
| 28 | + | 'requestFuture', |
| 29 | + | 'detectedEngine', |
| 30 | + | 'engineObj', |
| 31 | + | 'stats', |
| 32 | + | 'urlRegexp', |
| 33 | + | ] |
| 34 | + | |
24 | 35 | | def __init__(self, name, information): |
25 | 36 | | self.name = name |
26 | 37 | | |
| skipped 30 lines |
57 | 68 | | # We do not know the popularity, so make site go to bottom of list. |
58 | 69 | | self.alexa_rank = sys.maxsize |
59 | 70 | | |
| 71 | + | self.update_detectors() |
60 | 72 | | |
61 | 73 | | def __str__(self): |
62 | 74 | | return f"{self.name} ({self.url_main})" |
63 | 75 | | |
| 76 | + | def update_detectors(self): |
| 77 | + | if 'url' in self.__dict__: |
| 78 | + | url = self.url |
| 79 | + | for group in ['urlMain', 'urlSubpath']: |
| 80 | + | if group in url: |
| 81 | + | url = url.replace('{'+group+'}', self.__dict__[CaseConverter.camel_to_snake(group)]) |
| 82 | + | |
| 83 | + | self.url_regexp = URLMatcher.make_profile_url_regexp(url, self.regex_check) |
| 84 | + | |
| 85 | + | def detect_username(self, url: str) -> str: |
| 86 | + | if self.url_regexp: |
| 87 | + | import logging |
| 88 | + | match_groups = self.url_regexp.match(url) |
| 89 | + | if match_groups: |
| 90 | + | return match_groups.groups()[-1].rstrip('/') |
| 91 | + | |
| 92 | + | return None |
| 93 | + | |
64 | 94 | | @property |
65 | 95 | | def json(self): |
66 | 96 | | result = {} |
| skipped 3 lines |
70 | 100 | | # strip empty elements |
71 | 101 | | if v in (False, '', [], {}, None, sys.maxsize, 'username'): |
72 | 102 | | continue |
73 | | - | if field in ['name', 'engineData', 'requestFuture', 'detectedEngine', 'engineObj', 'stats']: |
| 103 | + | if field in self.NOT_SERIALIZABLE_FIELDS: |
74 | 104 | | continue |
75 | 105 | | result[field] = v |
76 | 106 | | |
| skipped 1 lines |
78 | 108 | | |
79 | 109 | | def update(self, updates: dict) -> MaigretSite: |
80 | 110 | | self.__dict__.update(updates) |
| 111 | + | self.update_detectors() |
81 | 112 | | |
82 | 113 | | return self |
83 | 114 | | |
| skipped 11 lines |
95 | 126 | | self.__dict__[field] = v |
96 | 127 | | |
97 | 128 | | self.engine_obj = engine |
| 129 | + | self.update_detectors() |
98 | 130 | | |
99 | 131 | | return self |
100 | 132 | | |
| skipped 189 lines |