| skipped 18 lines |
19 | 19 | | # You should have received a copy of the GNU General Public License |
20 | 20 | | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
21 | 21 | | |
| 22 | + | __version__ = '1.0' |
22 | 23 | | |
23 | 24 | | import argparse as ap, datetime as dt, numpy as np, numpy.random as npr, os, psutil, random, requests, signal, sys, tarfile, time |
24 | 25 | | import urllib.request, urllib.robotparser as robotparser, urllib.parse as uprs |
| skipped 144 lines |
169 | 170 | | alarm_time=self.timeout+2,errors=(self.TimeoutError,), debug=self.debug) |
170 | 171 | | self.phantomjs_short_timeout = self.block_timeout(self.phantomjs_hang_handler, \ |
171 | 172 | | alarm_time=short_timeout+1,errors=(self.TimeoutError,Exception), debug=self.debug) |
| 173 | + | self.phantomjs_quit_timeout = self.block_timeout(self.phantomjs_quit_hang_handler, \ |
| 174 | + | alarm_time=short_timeout+1,errors=(self.TimeoutError,Exception), debug=self.debug) |
172 | 175 | | self.robots_timeout = self.block_timeout(self.robots_hang_handler, \ |
173 | 176 | | alarm_time=short_timeout+1,errors=(self.TimeoutError,), debug=self.debug) |
174 | 177 | | self.fake = Factory.create() |
| skipped 4 lines |
179 | 182 | | self.data_usage = 0 |
180 | 183 | | self.get_blacklist() |
181 | 184 | | self.get_random_words() |
| 185 | + | print('This is ISP Data Pollution 🐙💨, Version {}'.format(__version__)) |
182 | 186 | | self.pollute_forever() |
183 | 187 | | |
184 | 188 | | def parseArgs(self): |
185 | 189 | | parser = ap.ArgumentParser() |
186 | 190 | | parser.add_argument('-bw', '--gb_per_month', help="GB per month", type=int, default=gb_per_month) |
187 | | - | parser.add_argument('-mm', '--maxmemory', help="Maximum memory of phantomjs (MB); 0=>restart every link", type=int, default=phantomjs_rss_limit_mb) |
| 191 | + | parser.add_argument('-mm', '--maxmemory', |
| 192 | + | help="Maximum memory of phantomjs (MB); 0=>restart every link", |
| 193 | + | type=int, default=0) |
| 194 | + | # parser.add_argument('-P', '--phantomjs-binary-path', help="Path to phantomjs binary", type=int, default=phantomjs_rss_limit_mb) |
188 | 195 | | parser.add_argument('-g', '--debug', help="Debug flag", action='store_true') |
189 | 196 | | args = parser.parse_args() |
190 | 197 | | for k in args.__dict__: setattr(self,k,getattr(args,k)) |
| skipped 15 lines |
206 | 213 | | # http://stackoverflow.com/questions/23390974/phantomjs-keeping-cache |
207 | 214 | | dcap = dict(DesiredCapabilities.PHANTOMJS) |
208 | 215 | | # dcap['browserName'] = 'Chrome' |
| 216 | + | # if hasattr(self,'phantomjs_binary_path'): dcap['phantomjs.binary.path'] = ( self.phantomjs_binary_path ) |
209 | 217 | | dcap['phantomjs.page.settings.userAgent'] = ( self.user_agent ) |
210 | 218 | | dcap['phantomjs.page.settings.loadImages'] = ( 'false' ) |
211 | 219 | | dcap['phantomjs.page.settings.clearMemoryCaches'] = ( 'true' ) |
212 | 220 | | dcap['phantomjs.page.settings.resourceTimeout'] = ( max(2000,int(self.timeout * 1000)) ) |
213 | 221 | | dcap['acceptSslCerts'] = ( True ) |
214 | | - | dcap['applicationCacheEnabled'] = ( False ) |
| 222 | + | dcap['applicationCacheEnabled'] = ( True ) |
215 | 223 | | dcap['handlesAlerts'] = ( False ) |
216 | 224 | | dcap['phantomjs.page.customHeaders'] = ( { 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate, sdch' } ) |
217 | 225 | | driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=['--disk-cache=false','--ignore-ssl-errors=false','--ssl-protocol=TLSv1.2']) |
| 226 | + | # if hasattr(self,'phantomjs_binary_path'): driver.capabilities.setdefault("phantomjs.binary.path", self.phantomjs_binary_path) |
218 | 227 | | driver.set_window_size(1296,1018) # Tor browser size on Linux |
219 | 228 | | driver.implicitly_wait(self.timeout+10) |
220 | 229 | | driver.set_page_load_timeout(self.timeout+10) |
221 | 230 | | self.session = driver |
222 | 231 | | |
223 | | - | def quit_session(self,hard_quit=False,pid=None): |
| 232 | + | def quit_session(self,hard_quit=False,pid=None,phantomjs_short_timeout_decorator=None): |
224 | 233 | | """ |
225 | 234 | | close, kill -9, quit, del |
226 | 235 | | :param hard_quit: |
| skipped 1 lines |
228 | 237 | | :return: |
229 | 238 | | """ |
230 | 239 | | # http://stackoverflow.com/questions/25110624/how-to-properly-stop-phantomjs-execution |
| 240 | + | if phantomjs_short_timeout_decorator is None: |
| 241 | + | phantomjs_short_timeout_decorator = self.phantomjs_short_timeout |
231 | 242 | | if hasattr(self,'session'): |
232 | 243 | | if not hard_quit: |
233 | | - | @self.phantomjs_short_timeout |
| 244 | + | @phantomjs_short_timeout_decorator |
234 | 245 | | def phantomjs_close(): self.session.close() |
235 | 246 | | phantomjs_close() |
236 | 247 | | try: |
237 | | - | @self.phantomjs_short_timeout |
| 248 | + | @phantomjs_short_timeout_decorator |
238 | 249 | | def phantomjs_send_signal(): self.session.service.process.send_signal(signal.SIGTERM) |
239 | 250 | | phantomjs_send_signal() |
240 | 251 | | except Exception as e: |
| skipped 7 lines |
248 | 259 | | except Exception as e: |
249 | 260 | | if self.debug: print('.kill() exception:\n{}'.format(e)) |
250 | 261 | | try: |
251 | | - | @self.phantomjs_short_timeout |
252 | | - | def phantomjs_quit(): |
253 | | - | self.session.quit() |
254 | | - | del self.session # only delete session if quit is successful |
| 262 | + | @phantomjs_short_timeout_decorator |
| 263 | + | def phantomjs_quit(): self.session.quit() |
255 | 264 | | phantomjs_quit() |
256 | 265 | | except Exception as e: |
257 | 266 | | if self.debug: print('.quit() exception:\n{}'.format(e)) |
| 267 | + | del self.session |
258 | 268 | | |
259 | 269 | | def clear_session(self): |
260 | 270 | | # https://sqa.stackexchange.com/questions/10466/how-to-clear-localstorage-using-selenium-and-webdriver |
| skipped 68 lines |
329 | 339 | | # if self.debug: print('There are {:d} words.'.format(len(self.words))) |
330 | 340 | | |
331 | 341 | | def pollute_forever(self): |
332 | | - | if self.verbose: print("""Display formats: |
| 342 | + | if self.verbose: print("""Display format: |
333 | 343 | | Downloading: website.com; NNNNN links [in library], H(domain)= B bits [entropy] |
334 | 344 | | Downloaded: website.com: +LLL/NNNNN links [added], H(domain)= B bits [entropy] |
335 | 345 | | """) |
| skipped 106 lines |
442 | 452 | | if self.hour_trigger: |
443 | 453 | | if hasattr(self,'session'): |
444 | 454 | | self.set_user_agent() |
445 | | - | try: |
446 | | - | @self.phantomjs_short_timeout |
447 | | - | def phantomjs_delete_all_cookies(): self.session.delete_all_cookies() |
448 | | - | phantomjs_delete_all_cookies() |
449 | | - | except Exception as e: |
450 | | - | if self.debug: print('.delete_all_cookies() exception:\n{}'.format(e)) |
| 455 | + | if True: |
| 456 | + | self.quit_session() |
| 457 | + | self.open_session() |
| 458 | + | else: |
| 459 | + | try: |
| 460 | + | @self.phantomjs_short_timeout |
| 461 | + | def phantomjs_delete_all_cookies(): self.session.delete_all_cookies() |
| 462 | + | phantomjs_delete_all_cookies() |
| 463 | + | except Exception as e: |
| 464 | + | if self.debug: print('.delete_all_cookies() exception:\n{}'.format(e)) |
451 | 465 | | self.seed_links() |
452 | 466 | | else: self.open_session() |
453 | 467 | | self.hour_trigger = False |
| skipped 300 lines |
754 | 768 | | # https://github.com/detro/ghostdriver/issues/334 |
755 | 769 | | # http://stackoverflow.com/questions/492519/timeout-on-a-function-call |
756 | 770 | | if self.debug: print('Looks like phantomjs has hung.') |
757 | | - | try: |
758 | | - | self.quit_session(hard_quit=True) |
759 | | - | self.open_session() |
760 | | - | except Exception as e: |
761 | | - | if self.debug: print('.quit_session() exception:\n{}'.format(e)) |
762 | | - | raise self.TimeoutError('Unable to quit the session as well.') |
763 | | - | raise self.TimeoutError('phantomjs is taking too long') |
| 771 | + | @self.phantomjs_quit_timeout |
| 772 | + | def phantomjs_quit_session(): |
| 773 | + | self.quit_session(phantomjs_short_timeout_decorator=self.phantomjs_quit_timeout) |
| 774 | + | phantomjs_session_quit() |
| 775 | + | self.open_session() |
| 776 | + | |
| 777 | + | def phantomjs_quit_hang_handler(self, signum, frame): |
| 778 | + | raise self.TimeoutError('phantomjs .quit method is taking too long') |
764 | 779 | | |
765 | 780 | | def robots_hang_handler(self, signum, frame): |
766 | 781 | | if self.debug: print('Looks like robotparser has hung.') |
| skipped 48 lines |