STRLCPY/changedetection.io

WIP
dgtlmoon committed 2 years ago

c5b0c198

1 parent c00459e1

■ ■ ■ ■ ■ ■

changedetectionio/__init__.py

		skipped 823 lines
824	824		return output
825	825
826	826
	827	+	@app.route("/preview/image/<string:uuid>", methods=['GET'])
	828	+	@login_required
	829	+	def preview_image_history_page(uuid):
	830	+
	831	+	# More for testing, possible to return the first/only
	832	+	if uuid == 'first':
	833	+	uuid = list(datastore.data['watching'].keys()).pop()
	834	+
	835	+	extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
	836	+	try:
	837	+	watch = datastore.data['watching'][uuid]
	838	+	except KeyError:
	839	+	flash("No history found for the specified link, bad link?", "error")
	840	+	return redirect(url_for('index'))
	841	+
	842	+	history = watch.history
	843	+	dates = list(history.keys())
	844	+
	845	+	if len(dates) < 1:
	846	+	flash("Not enough saved change detection snapshots to produce a report.", "error")
	847	+	return redirect(url_for('index'))
	848	+
	849	+	output = render_template("preview-image.html",
	850	+	watch=watch,
	851	+	extra_stylesheets=extra_stylesheets,
	852	+	uuid=uuid,
	853	+	current_diff_url=watch['url'],
	854	+	newest_history_key = watch.newest_history_key,
	855	+	extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
	856	+	left_sticky=True,
	857	+	last_error=watch['last_error'],
	858	+	last_error_text=watch.get_error_text(),
	859	+	last_error_screenshot=watch.get_error_snapshot()
	860	+	)
	861	+	return output
	862	+
827	863		@app.route("/diff/<string:uuid>", methods=['GET'])
828	864		@login_required
829	865		def diff_history_page(uuid):
		skipped 736 lines

■ ■ ■ ■ ■ ■

changedetectionio/fetch_processor/__init__.py

1		-	available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Static Image')]
	1	+	available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Static Image'), ('rendered_webpage', 'Screenshot of page or element')]
2	2
3	3		class fetch_processor():
4	4		contents = b''
		skipped 39 lines

■ ■ ■ ■ ■ ■

changedetectionio/fetch_processor/rendered_webpage.py

	1	+	import hashlib
	2	+	import imagehash
	3	+	from PIL import Image
	4	+	import io
	5	+	import logging
	6	+	import os
	7	+	import re
	8	+	import time
	9	+	import urllib3
	10	+
	11	+	# fetch processor for requesting and comparing a single image
	12	+	# can use both requests and playwright/selenium
	13	+
	14	+	# - imagehash for change detection (or https://github.com/dgtlmoon/changedetection.io/pull/419/files#diff-7d3854710a6c0faead783f75850100a4c4b69409309200d3a83692dc9783bf6eR17 ?)
	15	+	# - skimage.metrics import structural_similarity for viewing the diff
	16	+
	17	+
	18	+	from changedetectionio import content_fetcher, html_tools
	19	+
	20	+	urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
	21	+
	22	+	from . import fetch_processor
	23	+
	24	+
	25	+	# Some common stuff here that can be moved to a base class
	26	+	# (set_proxy_from_list)
	27	+	class perform_site_check(fetch_processor):
	28	+	xpath_data = None
	29	+
	30	+	def run(self, uuid):
	31	+	changed_detected = False
	32	+
	33	+	watch = self.datastore.data['watching'].get(uuid)
	34	+
	35	+	# Protect against file:// access
	36	+	if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
	37	+	raise Exception(
	38	+	"file:// type access is denied for security reasons."
	39	+	)
	40	+
	41	+	if watch.get('fetch_backend') != 'html_webdriver':
	42	+	raise Exception(
	43	+	"Requires a Chrome compatible fetcher enabled."
	44	+	)
	45	+
	46	+	# Unset any existing notification error
	47	+	update_obj = {'last_notification_error': False, 'last_error': False}
	48	+
	49	+	extra_headers = self.datastore.data['watching'][uuid].get('headers')
	50	+
	51	+	# Tweak the base config with the per-watch ones
	52	+	request_headers = self.datastore.data['settings']['headers'].copy()
	53	+	request_headers.update(extra_headers)
	54	+
	55	+	# https://github.com/psf/requests/issues/4525
	56	+	# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
	57	+	# do this by accident.
	58	+	if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
	59	+	request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
	60	+
	61	+	timeout = self.datastore.data['settings']['requests']['timeout']
	62	+	url = watch.get('url')
	63	+	request_body = self.datastore.data['watching'][uuid].get('body')
	64	+	request_method = self.datastore.data['watching'][uuid].get('method')
	65	+	ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
	66	+
	67	+	prefer_backend = watch['fetch_backend']
	68	+	if hasattr(content_fetcher, prefer_backend):
	69	+	klass = getattr(content_fetcher, prefer_backend)
	70	+	else:
	71	+	# If the klass doesnt exist, just use a default
	72	+	klass = getattr(content_fetcher, "html_requests")
	73	+
	74	+	proxy_args = self.set_proxy_from_list(watch)
	75	+	fetcher = klass(proxy_override=proxy_args)
	76	+
	77	+	fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes)
	78	+	fetcher.quit()
	79	+
	80	+	# if not image/foobar in mimetype
	81	+	# raise content_fecther.NotAnImage(mimetype) ?
	82	+	# or better to try load with PIL and catch exception?
	83	+
	84	+	update_obj["last_check_status"] = fetcher.get_last_status_code()
	85	+
	86	+	self.contents = fetcher.screenshot
	87	+
	88	+	image = Image.open(io.BytesIO(fetcher.screenshot))
	89	+
	90	+	# @todo different choice?
	91	+	# https://github.com/JohannesBuchner/imagehash#references
	92	+	fetched_hash = str(imagehash.average_hash(image))
	93	+
	94	+	# The main thing that all this at the moment comes down to :)
	95	+	if watch['previous_md5'] != fetched_hash:
	96	+	changed_detected = True
	97	+
	98	+	# Always record the new checksum
	99	+	update_obj["previous_md5"] = fetched_hash
	100	+
	101	+	# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
	102	+	if not watch.get('previous_md5'):
	103	+	watch['previous_md5'] = fetched_hash
	104	+
	105	+	return changed_detected, update_obj
1	106

■ ■ ■ ■ ■ ■

changedetectionio/model/Watch.py

		skipped 26 lines
27	27		'extract_text': [], # Extract text by regex after filters
28	28		'extract_title_as_title': False,
29	29		'fetch_backend': None,
30		-	'fetch_processor': 'json_html_plaintext', # json_html_plaintext, image
	30	+	'fetch_processor': 'json_html_plaintext', # json_html_plaintext, image, rendered_webpage
31	31		'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
32	32		'headers': {}, # Extra headers to send
33	33		'ignore_text': [], # List of text to ignore when calculating the comparison checksum
		skipped 229 lines

■ ■ ■ ■ ■ ■

changedetectionio/templates/preview-image.html

		skipped 4 lines
5	5		</div>
6	6
7	7		<div id="diff-ui">
8		-	<img style="max-width: 100%" src="{{ url_for('render_single_image', uuid=uuid, date=current_previous_version) }}" />
	8	+	<img style="max-width: 100%" src="{{ url_for('render_single_image', uuid=uuid, history_timestamp=newest_history_key) }}" />
9	9		</div>
10	10
11	11		{% endblock %}

■ ■ ■ ■ ■ ■

changedetectionio/templates/watch-overview.html

		skipped 115 lines
116	116		class="recheck pure-button button-small pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
117	117		<a href="{{ url_for('edit_page', uuid=watch.uuid)}}" class="pure-button button-small pure-button-primary">Edit</a>
118	118		{% if watch.history_n >= 2 %}
119		-	{% if watch.fetch_processor == "image" %}
	119	+	{% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
120	120		<a href="{{ url_for('diff_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
121	121		{% else %}
122	122		<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
123		-	{% endif %}
	123	+	{% endif %}
124	124
125	125		{% else %}
126	126		{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
127		-	<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
	127	+
	128	+	{% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
	129	+	<a href="{{ url_for('preview_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Preview</a>
	130	+	{% else %}
	131	+	<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
	132	+	{% endif %}
128	133		{% endif %}
129	134		{% endif %}
130	135		</td>
		skipped 27 lines

■ ■ ■ ■ ■ ■

changedetectionio/update_worker.py

		skipped 119 lines
120	120		os.unlink(full_path)
121	121
122	122		def run(self):
123		-
124		-
125		-	from .fetch_processor import json_html_plaintext as processor_json_html_plaintext, image as processor_image
126		-
127		-
128	123		while not self.app.config.exit.is_set():
129	124
130	125		try:
		skipped 17 lines
148	143		update_handler = None
149	144
150	145		if watch.get('fetch_processor') == 'image':
	146	+	from .fetch_processor import image as processor_image
151	147		update_handler = processor_image.perform_site_check(datastore=self.datastore)
	148	+	elif watch.get('fetch_processor') == 'rendered_webpage':
	149	+	from .fetch_processor import image as processor_rendered_webpage
	150	+	update_handler = processor_rendered_webpage.perform_site_check(datastore=self.datastore)
152	151		else:
153	152		# Anything else for now will be `json_html_plaintext`
	153	+	from .fetch_processor import json_html_plaintext as processor_json_html_plaintext
154	154		update_handler = processor_json_html_plaintext.perform_site_check(datastore=self.datastore)
155	155
156	156		changed_detected, update_obj = update_handler.run(uuid)
		skipped 156 lines

WIP