🤬
  • ■ ■ ■ ■ ■ ■
    isp_data_pollution.py
    skipped 240 lines
    241 241   self.robots_timeout = self.block_timeout(self.robots_hang_handler, \
    242 242   alarm_time=short_timeout+2,errors=(self.TimeoutError,), debug=self.debug)
    243 243   self.check_chromedriver_version()
    244  - self.fake_ua = fake_ua.UserAgent()
     244 + self.get_useragents()
    245 245   self.hour_trigger = True
    246 246   self.twentyfour_hour_trigger = True
    247 247   self.domain_links = dict()
    skipped 122 lines
    370 370   chromedriver_clear()
    371 371   except Exception as e:
    372 372   if self.debug: print(f'.execute_script() exception:\n{e}')
     373 + 
     374 + def get_useragents(self):
     375 + for attempt in range(5):
     376 + try:
     377 + self.fake_ua = fake_ua.UserAgent()
     378 + except urllib.error.URLError as e:
     379 + if self.debug: print(f'.UserAgent exception #{attempt}:\n{e}')
     380 + else:
     381 + break
     382 + else:
     383 + print('Too many .UserAgent failures. Exiting.')
     384 + sys.exit(1)
    373 385   
    374 386   def get_blacklist(self,update_flag=False):
    375 387   blacklist_domains = getattr(self,'blacklist_domains',set())
    skipped 745 lines
Please wait...
Page is in error, reload to recover