🤬
  • ■ ■ ■ ■ ■ ■
    isp_data_pollution.py
    skipped 18 lines
    19 19  # You should have received a copy of the GNU General Public License
    20 20  # along with this program. If not, see <http://www.gnu.org/licenses/>.
    21 21   
     22 +__version__ = '1.0'
    22 23   
    23 24  import argparse as ap, datetime as dt, numpy as np, numpy.random as npr, os, psutil, random, requests, signal, sys, tarfile, time
    24 25  import urllib.request, urllib.robotparser as robotparser, urllib.parse as uprs
    skipped 144 lines
    169 170   alarm_time=self.timeout+2,errors=(self.TimeoutError,), debug=self.debug)
    170 171   self.phantomjs_short_timeout = self.block_timeout(self.phantomjs_hang_handler, \
    171 172   alarm_time=short_timeout+1,errors=(self.TimeoutError,Exception), debug=self.debug)
     173 + self.phantomjs_quit_timeout = self.block_timeout(self.phantomjs_quit_hang_handler, \
     174 + alarm_time=short_timeout+1,errors=(self.TimeoutError,Exception), debug=self.debug)
    172 175   self.robots_timeout = self.block_timeout(self.robots_hang_handler, \
    173 176   alarm_time=short_timeout+1,errors=(self.TimeoutError,), debug=self.debug)
    174 177   self.fake = Factory.create()
    skipped 4 lines
    179 182   self.data_usage = 0
    180 183   self.get_blacklist()
    181 184   self.get_random_words()
     185 + print('This is ISP Data Pollution 🐙💨, Version {}'.format(__version__))
    182 186   self.pollute_forever()
    183 187   
    184 188   def parseArgs(self):
    185 189   parser = ap.ArgumentParser()
    186 190   parser.add_argument('-bw', '--gb_per_month', help="GB per month", type=int, default=gb_per_month)
    187  - parser.add_argument('-mm', '--maxmemory', help="Maximum memory of phantomjs (MB); 0=>restart every link", type=int, default=phantomjs_rss_limit_mb)
     191 + parser.add_argument('-mm', '--maxmemory',
     192 + help="Maximum memory of phantomjs (MB); 0=>restart every link",
     193 + type=int, default=0)
     194 + # parser.add_argument('-P', '--phantomjs-binary-path', help="Path to phantomjs binary", type=int, default=phantomjs_rss_limit_mb)
    188 195   parser.add_argument('-g', '--debug', help="Debug flag", action='store_true')
    189 196   args = parser.parse_args()
    190 197   for k in args.__dict__: setattr(self,k,getattr(args,k))
    skipped 15 lines
    206 213   # http://stackoverflow.com/questions/23390974/phantomjs-keeping-cache
    207 214   dcap = dict(DesiredCapabilities.PHANTOMJS)
    208 215   # dcap['browserName'] = 'Chrome'
     216 + # if hasattr(self,'phantomjs_binary_path'): dcap['phantomjs.binary.path'] = ( self.phantomjs_binary_path )
    209 217   dcap['phantomjs.page.settings.userAgent'] = ( self.user_agent )
    210 218   dcap['phantomjs.page.settings.loadImages'] = ( 'false' )
    211 219   dcap['phantomjs.page.settings.clearMemoryCaches'] = ( 'true' )
    212 220   dcap['phantomjs.page.settings.resourceTimeout'] = ( max(2000,int(self.timeout * 1000)) )
    213 221   dcap['acceptSslCerts'] = ( True )
    214  - dcap['applicationCacheEnabled'] = ( False )
     222 + dcap['applicationCacheEnabled'] = ( True )
    215 223   dcap['handlesAlerts'] = ( False )
    216 224   dcap['phantomjs.page.customHeaders'] = ( { 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate, sdch' } )
    217 225   driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=['--disk-cache=false','--ignore-ssl-errors=false','--ssl-protocol=TLSv1.2'])
     226 + # if hasattr(self,'phantomjs_binary_path'): driver.capabilities.setdefault("phantomjs.binary.path", self.phantomjs_binary_path)
    218 227   driver.set_window_size(1296,1018) # Tor browser size on Linux
    219 228   driver.implicitly_wait(self.timeout+10)
    220 229   driver.set_page_load_timeout(self.timeout+10)
    221 230   self.session = driver
    222 231   
    223  - def quit_session(self,hard_quit=False,pid=None):
     232 + def quit_session(self,hard_quit=False,pid=None,phantomjs_short_timeout_decorator=None):
    224 233   """
    225 234   close, kill -9, quit, del
    226 235   :param hard_quit:
    skipped 1 lines
    228 237   :return:
    229 238   """
    230 239   # http://stackoverflow.com/questions/25110624/how-to-properly-stop-phantomjs-execution
     240 + if phantomjs_short_timeout_decorator is None:
     241 + phantomjs_short_timeout_decorator = self.phantomjs_short_timeout
    231 242   if hasattr(self,'session'):
    232 243   if not hard_quit:
    233  - @self.phantomjs_short_timeout
     244 + @phantomjs_short_timeout_decorator
    234 245   def phantomjs_close(): self.session.close()
    235 246   phantomjs_close()
    236 247   try:
    237  - @self.phantomjs_short_timeout
     248 + @phantomjs_short_timeout_decorator
    238 249   def phantomjs_send_signal(): self.session.service.process.send_signal(signal.SIGTERM)
    239 250   phantomjs_send_signal()
    240 251   except Exception as e:
    skipped 7 lines
    248 259   except Exception as e:
    249 260   if self.debug: print('.kill() exception:\n{}'.format(e))
    250 261   try:
    251  - @self.phantomjs_short_timeout
    252  - def phantomjs_quit():
    253  - self.session.quit()
    254  - del self.session # only delete session if quit is successful
     262 + @phantomjs_short_timeout_decorator
     263 + def phantomjs_quit(): self.session.quit()
    255 264   phantomjs_quit()
    256 265   except Exception as e:
    257 266   if self.debug: print('.quit() exception:\n{}'.format(e))
     267 + del self.session
    258 268   
    259 269   def clear_session(self):
    260 270   # https://sqa.stackexchange.com/questions/10466/how-to-clear-localstorage-using-selenium-and-webdriver
    skipped 68 lines
    329 339   # if self.debug: print('There are {:d} words.'.format(len(self.words)))
    330 340   
    331 341   def pollute_forever(self):
    332  - if self.verbose: print("""Display formats:
     342 + if self.verbose: print("""Display format:
    333 343  Downloading: website.com; NNNNN links [in library], H(domain)= B bits [entropy]
    334 344  Downloaded: website.com: +LLL/NNNNN links [added], H(domain)= B bits [entropy]
    335 345  """)
    skipped 106 lines
    442 452   if self.hour_trigger:
    443 453   if hasattr(self,'session'):
    444 454   self.set_user_agent()
    445  - try:
    446  - @self.phantomjs_short_timeout
    447  - def phantomjs_delete_all_cookies(): self.session.delete_all_cookies()
    448  - phantomjs_delete_all_cookies()
    449  - except Exception as e:
    450  - if self.debug: print('.delete_all_cookies() exception:\n{}'.format(e))
     455 + if True:
     456 + self.quit_session()
     457 + self.open_session()
     458 + else:
     459 + try:
     460 + @self.phantomjs_short_timeout
     461 + def phantomjs_delete_all_cookies(): self.session.delete_all_cookies()
     462 + phantomjs_delete_all_cookies()
     463 + except Exception as e:
     464 + if self.debug: print('.delete_all_cookies() exception:\n{}'.format(e))
    451 465   self.seed_links()
    452 466   else: self.open_session()
    453 467   self.hour_trigger = False
    skipped 300 lines
    754 768   # https://github.com/detro/ghostdriver/issues/334
    755 769   # http://stackoverflow.com/questions/492519/timeout-on-a-function-call
    756 770   if self.debug: print('Looks like phantomjs has hung.')
    757  - try:
    758  - self.quit_session(hard_quit=True)
    759  - self.open_session()
    760  - except Exception as e:
    761  - if self.debug: print('.quit_session() exception:\n{}'.format(e))
    762  - raise self.TimeoutError('Unable to quit the session as well.')
    763  - raise self.TimeoutError('phantomjs is taking too long')
     771 + @self.phantomjs_quit_timeout
     772 + def phantomjs_quit_session():
     773 + self.quit_session(phantomjs_short_timeout_decorator=self.phantomjs_quit_timeout)
     774 + phantomjs_session_quit()
     775 + self.open_session()
     776 + 
     777 + def phantomjs_quit_hang_handler(self, signum, frame):
     778 + raise self.TimeoutError('phantomjs .quit method is taking too long')
    764 779   
    765 780   def robots_hang_handler(self, signum, frame):
    766 781   if self.debug: print('Looks like robotparser has hung.')
    skipped 48 lines
Please wait...
Page is in error, reload to recover