🤬
  • ■ ■ ■ ■ ■
    changedetectionio/__init__.py
    skipped 631 lines
    632 632   # Only works reliably with Playwright
    633 633   visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
    634 634   
     635 + watch = datastore.data['watching'].get(uuid)
     636 + 
     637 + # Which tabs to show/hide ?
     638 + enabled_tabs = []
     639 + if watch.get('fetch_processor') == 'json_html_plaintext' or not watch.get('fetch_processor'):
     640 + enabled_tabs.append('visual-selector')
     641 + enabled_tabs.append('text-filters-and-triggers')
    635 642   
    636 643   output = render_template("edit.html",
    637 644   uuid=uuid,
    638  - watch=datastore.data['watching'][uuid],
     645 + watch=watch,
    639 646   form=form,
     647 + enabled_tabs = enabled_tabs,
    640 648   has_empty_checktime=using_default_check_time,
    641 649   has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
    642 650   using_global_webdriver_wait=default['webdriver_delay'] is None,
    skipped 814 lines
  • ■ ■ ■ ■ ■ ■
    changedetectionio/content_fetcher.py
    skipped 65 lines
    66 66   return
    67 67   
    68 68  class Fetcher():
     69 + content = None
    69 70   error = None
     71 + fetcher_description = "No description"
     72 + headers = None
     73 + raw_content = None
    70 74   status_code = None
    71  - content = None
    72  - headers = None
     75 + webdriver_js_execute_code = None
    73 76   
    74  - fetcher_description = "No description"
    75  - webdriver_js_execute_code = None
    76 77   xpath_element_js = """
    77 78   // Include the getXpath script directly, easier than fetching
    78 79   !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
    skipped 320 lines
    399 400   raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
    400 401   
    401 402   self.content = page.content()
     403 + self.raw_content = page.content()
     404 + 
    402 405   self.status_code = response.status
    403 406   self.headers = response.all_headers()
    404 407   
    skipped 119 lines
    524 527   # @todo - dom wait loaded?
    525 528   time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
    526 529   self.content = self.driver.page_source
     530 + self.raw_content = self.driver.page_source
    527 531   self.headers = {}
    528 532   
    529 533   # Does the connection to the webdriver work? run a test connection.
    skipped 73 lines
    603 607   
    604 608   self.status_code = r.status_code
    605 609   self.content = r.text
     610 + self.raw_content = r.content
    606 611   self.headers = r.headers
    607 612   
    608 613   
    skipped 8 lines
  • ■ ■ ■ ■ ■ ■
    changedetectionio/fetch_processor/__init__.py
     1 +available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Static Image')]
     2 + 
    1 3  class fetch_processor():
    2 4   contents = b''
    3 5   screenshot = None
    skipped 39 lines
  • ■ ■ ■ ■ ■ ■
    changedetectionio/fetch_processor/image.py
     1 +import hashlib
     2 +import imagehash
     3 +from PIL import Image
     4 +import io
     5 +import logging
     6 +import os
     7 +import re
     8 +import time
     9 +import urllib3
     10 + 
     11 +# fetch processor for requesting and comparing a single image
     12 +# can use both requests and playwright/selenium
     13 + 
     14 +# - imagehash for change detection (or https://github.com/dgtlmoon/changedetection.io/pull/419/files#diff-7d3854710a6c0faead783f75850100a4c4b69409309200d3a83692dc9783bf6eR17 ?)
     15 +# - skimage.metrics import structural_similarity for viewing the diff
     16 + 
     17 + 
     18 +from changedetectionio import content_fetcher, html_tools
     19 + 
     20 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
     21 + 
     22 +from . import fetch_processor
     23 + 
     24 + 
     25 +# Some common stuff here that can be moved to a base class
     26 +# (set_proxy_from_list)
     27 +class perform_site_check(fetch_processor):
     28 + xpath_data = None
     29 + 
     30 + def run(self, uuid):
     31 + changed_detected = False
     32 + screenshot = False # as bytes
     33 + stripped_text_from_html = ""
     34 + 
     35 + watch = self.datastore.data['watching'].get(uuid)
     36 + 
     37 + # Protect against file:// access
     38 + if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
     39 + raise Exception(
     40 + "file:// type access is denied for security reasons."
     41 + )
     42 + 
     43 + # Unset any existing notification error
     44 + update_obj = {'last_notification_error': False, 'last_error': False}
     45 + 
     46 + extra_headers = self.datastore.data['watching'][uuid].get('headers')
     47 + 
     48 + # Tweak the base config with the per-watch ones
     49 + request_headers = self.datastore.data['settings']['headers'].copy()
     50 + request_headers.update(extra_headers)
     51 + 
     52 + # https://github.com/psf/requests/issues/4525
     53 + # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
     54 + # do this by accident.
     55 + if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
     56 + request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
     57 + 
     58 + timeout = self.datastore.data['settings']['requests']['timeout']
     59 + url = watch.get('url')
     60 + request_body = self.datastore.data['watching'][uuid].get('body')
     61 + request_method = self.datastore.data['watching'][uuid].get('method')
     62 + ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
     63 + 
     64 + prefer_backend = watch['fetch_backend']
     65 + if hasattr(content_fetcher, prefer_backend):
     66 + klass = getattr(content_fetcher, prefer_backend)
     67 + else:
     68 + # If the klass doesnt exist, just use a default
     69 + klass = getattr(content_fetcher, "html_requests")
     70 + 
     71 + proxy_args = self.set_proxy_from_list(watch)
     72 + fetcher = klass(proxy_override=proxy_args)
     73 + 
     74 + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes)
     75 + fetcher.quit()
     76 + 
     77 + # if not image/foobar in mimetype
     78 + # raise content_fecther.NotAnImage(mimetype) ?
     79 + # or better to try load with PIL and catch exception?
     80 + 
     81 + update_obj["last_check_status"] = fetcher.get_last_status_code()
     82 + 
     83 + image = Image.open(io.BytesIO(fetcher.raw_content))
     84 + 
     85 + # @todo different choice?
     86 + # https://github.com/JohannesBuchner/imagehash#references
     87 + fetched_hash = str(imagehash.average_hash(image))
     88 + 
     89 + # The main thing that all this at the moment comes down to :)
     90 + if watch['previous_md5'] != fetched_hash:
     91 + changed_detected = True
     92 + 
     93 + # Always record the new checksum
     94 + update_obj["previous_md5"] = fetched_hash
     95 + 
     96 + # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
     97 + if not watch.get('previous_md5'):
     98 + watch['previous_md5'] = fetched_hash
     99 + 
     100 + #self.contents = fetcher.screenshot
     101 + 
     102 + return changed_detected, update_obj
     103 + 
  • ■ ■ ■ ■ ■
    changedetectionio/fetch_processor/rendered_webpage.py
     1 + 
  • ■ ■ ■ ■ ■ ■
    changedetectionio/forms.py
    skipped 305 lines
    306 306   
    307 307   
    308 308  class quickWatchForm(Form):
     309 + from . import fetch_processor
     310 + 
    309 311   url = fields.URLField('URL', validators=[validateURL()])
    310 312   tag = StringField('Group tag', [validators.Optional()])
     313 + fetch_processor = RadioField(u'Compare as', choices=fetch_processor.available_fetchers, default=fetch_processor.available_fetchers[0][0])
    311 314   watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
    312 315   edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
    313 316   
    skipped 97 lines
  • ■ ■ ■ ■ ■
    changedetectionio/model/Watch.py
    skipped 26 lines
    27 27   'extract_text': [], # Extract text by regex after filters
    28 28   'extract_title_as_title': False,
    29 29   'fetch_backend': None,
     30 + 'fetch_processor': None, # default None, json_html_plaintext, image
    30 31   'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
    31 32   'headers': {}, # Extra headers to send
    32 33   'ignore_text': [], # List of text to ignore when calculating the comparison checksum
    skipped 220 lines
  • ■ ■ ■ ■ ■
    changedetectionio/templates/edit.html
    skipped 24 lines
    25 25   <ul>
    26 26   <li class="tab" id=""><a href="#general">General</a></li>
    27 27   <li class="tab"><a href="#request">Request</a></li>
     28 + {% if 'visual-selector' in enabled_tabs %}
    28 29   <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
     30 + {%endif%}
     31 + {% if 'text-filters-and-triggers' in enabled_tabs %}
    29 32   <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
     33 + {%endif%}
     34 + 
    30 35   <li class="tab"><a href="#notifications">Notifications</a></li>
    31 36   </ul>
    32 37   </div>
    skipped 301 lines
  • ■ ■ ■ ■ ■ ■
    changedetectionio/templates/watch-overview.html
    skipped 14 lines
    15 15   <div>
    16 16   {{ render_simple_field(form.url, placeholder="https://...", required=true) }}
    17 17   {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
     18 + <br/>
     19 + {{ render_field(form.fetch_processor) }}
    18 20   </div>
    19 21   <div>
    20 22   {{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
    skipped 130 lines
  • ■ ■ ■ ■ ■
    changedetectionio/update_worker.py
    skipped 119 lines
    120 120   os.unlink(full_path)
    121 121   
    122 122   def run(self):
    123  - from .fetch_processor import json_html_plaintext
     123 + 
    124 124   
     125 + from .fetch_processor import json_html_plaintext as processor_json_html_plaintext, image as processor_image
    125 126   
    126 127   
    127 128   while not self.app.config.exit.is_set():
    skipped 7 lines
    135 136   self.current_uuid = uuid
    136 137   
    137 138   if uuid in list(self.datastore.data['watching'].keys()):
    138  - update_handler = None # Interface object
     139 + update_handler = None # Interface object
    139 140   changed_detected = False
    140  - update_obj= {}
     141 + update_obj = {}
    141 142   process_changedetection_results = True
    142  - print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
     143 + watch = self.datastore.data['watching'].get(uuid)
     144 + print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, watch.get('url')))
    143 145   now = time.time()
    144 146   
    145 147   try:
    146  - update_handler = json_html_plaintext.perform_site_check(datastore=self.datastore)
     148 + update_handler = None
     149 + 
     150 + if watch.get('fetch_processor') == 'image':
     151 + update_handler = processor_image.perform_site_check(datastore=self.datastore)
     152 + else:
     153 + # Anything else for now will be `json_html_plaintext`
     154 + update_handler = processor_json_html_plaintext.perform_site_check(datastore=self.datastore)
     155 + 
    147 156   changed_detected, update_obj = update_handler.run(uuid)
     157 + 
    148 158   # Re #342
    149 159   # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
    150 160   # We then convert/.decode('utf-8') for the notification etc
    skipped 152 lines
  • ■ ■ ■ ■ ■ ■
    requirements.txt
    skipped 42 lines
    43 43   
    44 44  # playwright is installed at Dockerfile build time because it's not available on all platforms
    45 45   
     46 + 
     47 +imagehash ~= 4.3.0
     48 +pillow
     49 + 
Please wait...
Page is in error, reload to recover