STRLCPY/changedetection.io

WIP
dgtlmoon committed 2 years ago

0e0bd932

1 parent c5b0c198

■ ■ ■ ■ ■ ■

changedetectionio/__init__.py

		skipped 395 lines
396	396		existing_tags = datastore.get_all_tags()
397	397
398	398		form = forms.quickWatchForm(request.form)
	399	+	webdriver_enabled = True if os.getenv('PLAYWRIGHT_DRIVER_URL', False) or os.getenv('PLAYWRIGHT_DRIVER_URL', False) else False
	400	+
399	401		output = render_template("watch-overview.html",
400		-	form=form,
401		-	watches=sorted_watches,
402		-	tags=existing_tags,
403	402		active_tag=limit_tag,
404	403		app_rss_token=datastore.data['settings']['application']['rss_access_token'],
	404	+	form=form,
	405	+	guid=datastore.data['app_guid'],
405	406		has_unviewed=datastore.has_unviewed,
406		-	# Don't link to hosting when we're on the hosting environment
407	407		hosted_sticky=os.getenv("SALTED_PASS", False) == False,
408		-	guid=datastore.data['app_guid'],
409		-	queued_uuids=[uuid for p,uuid in update_q.queue])
410		-
	408	+	queued_uuids=[uuid for p, uuid in update_q.queue],
	409	+	tags=existing_tags,
	410	+	watches=sorted_watches,
	411	+	webdriver_enabled=webdriver_enabled
	412	+	)
411	413
412	414		if session.get('share-link'):
413	415		del(session['share-link'])
		skipped 814 lines
1228	1230		return redirect(url_for('index'))
1229	1231
1230	1232		url = request.form.get('url').strip()
1231		-	fetch_processor =request.form.get('fetch_processor').strip()
	1233	+
1232	1234		if datastore.url_exists(url):
1233	1235		flash('The URL {} already exists'.format(url), "error")
1234	1236		return redirect(url_for('index'))
1235	1237
1236	1238		add_paused = request.form.get('edit_and_watch_submit_button') != None
	1239	+	fetch_processor = request.form.get('fetch_processor')
	1240	+
	1241	+	extras = {'paused': add_paused}
	1242	+	if fetch_processor:
	1243	+	extras['fetch_processor']=fetch_processor
	1244	+	if fetch_processor == 'image':
	1245	+	extras['fetch_backend'] = 'html_webdriver'
	1246	+
1237	1247		new_uuid = datastore.add_watch(url=url,
1238	1248		tag=request.form.get('tag').strip(),
1239		-	extras={'paused': add_paused, 'fetch_processor': fetch_processor}
	1249	+	extras=extras
1240	1250		)
1241	1251
1242	1252
		skipped 359 lines

■ ■ ■ ■ ■ ■

changedetectionio/fetch_processor/__init__.py

1		-	available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Static Image'), ('rendered_webpage', 'Screenshot of page or element')]
	1	+	available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Graphically by image or web-page')]
2	2
3	3		class fetch_processor():
4	4		contents = b''
		skipped 39 lines

■ ■ ■ ■ ■ ■

changedetectionio/fetch_processor/image.py

		skipped 33 lines
34	34
35	35		watch = self.datastore.data['watching'].get(uuid)
36	36
	37	+
	38	+	if watch.get('fetch_backend') != 'html_webdriver':
	39	+	raise Exception(
	40	+	"Requires a Chrome compatible fetcher enabled."
	41	+	)
	42	+
37	43		# Protect against file:// access
38	44		if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
39	45		raise Exception(
		skipped 40 lines
80	86
81	87		update_obj["last_check_status"] = fetcher.get_last_status_code()
82	88
83		-	self.contents = fetcher.raw_content
	89	+	if 'image' in fetcher.headers['content-type']:
	90	+	self.contents = fetcher.raw_content
	91	+	else:
	92	+	self.contents = fetcher.screenshot
84	93
85		-	image = Image.open(io.BytesIO(fetcher.raw_content))
	94	+	image = Image.open(io.BytesIO(self.contents))
86	95
87	96		# @todo different choice?
88	97		# https://github.com/JohannesBuchner/imagehash#references
		skipped 15 lines

■ ■ ■ ■ ■ ■

changedetectionio/fetch_processor/rendered_webpage.py

1	-	import hashlib
2	-	import imagehash
3	-	from PIL import Image
4	-	import io
5	-	import logging
6	-	import os
7	-	import re
8	-	import time
9	-	import urllib3
10	-
11	-	# fetch processor for requesting and comparing a single image
12	-	# can use both requests and playwright/selenium
13	-
14	-	# - imagehash for change detection (or https://github.com/dgtlmoon/changedetection.io/pull/419/files#diff-7d3854710a6c0faead783f75850100a4c4b69409309200d3a83692dc9783bf6eR17 ?)
15	-	# - skimage.metrics import structural_similarity for viewing the diff
16	-
17	-
18	-	from changedetectionio import content_fetcher, html_tools
19	-
20	-	urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
21	-
22	-	from . import fetch_processor
23	-
24	-
25	-	# Some common stuff here that can be moved to a base class
26	-	# (set_proxy_from_list)
27	-	class perform_site_check(fetch_processor):
28	-	xpath_data = None
29	-
30	-	def run(self, uuid):
31	-	changed_detected = False
32	-
33	-	watch = self.datastore.data['watching'].get(uuid)
34	-
35	-	# Protect against file:// access
36	-	if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
37	-	raise Exception(
38	-	"file:// type access is denied for security reasons."
39	-	)
40	-
41	-	if watch.get('fetch_backend') != 'html_webdriver':
42	-	raise Exception(
43	-	"Requires a Chrome compatible fetcher enabled."
44	-	)
45	-
46	-	# Unset any existing notification error
47	-	update_obj = {'last_notification_error': False, 'last_error': False}
48	-
49	-	extra_headers = self.datastore.data['watching'][uuid].get('headers')
50	-
51	-	# Tweak the base config with the per-watch ones
52	-	request_headers = self.datastore.data['settings']['headers'].copy()
53	-	request_headers.update(extra_headers)
54	-
55	-	# https://github.com/psf/requests/issues/4525
56	-	# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
57	-	# do this by accident.
58	-	if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
59	-	request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
60	-
61	-	timeout = self.datastore.data['settings']['requests']['timeout']
62	-	url = watch.get('url')
63	-	request_body = self.datastore.data['watching'][uuid].get('body')
64	-	request_method = self.datastore.data['watching'][uuid].get('method')
65	-	ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
66	-
67	-	prefer_backend = watch['fetch_backend']
68	-	if hasattr(content_fetcher, prefer_backend):
69	-	klass = getattr(content_fetcher, prefer_backend)
70	-	else:
71	-	# If the klass doesnt exist, just use a default
72	-	klass = getattr(content_fetcher, "html_requests")
73	-
74	-	proxy_args = self.set_proxy_from_list(watch)
75	-	fetcher = klass(proxy_override=proxy_args)
76	-
77	-	fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes)
78	-	fetcher.quit()
79	-
80	-	# if not image/foobar in mimetype
81	-	# raise content_fecther.NotAnImage(mimetype) ?
82	-	# or better to try load with PIL and catch exception?
83	-
84	-	update_obj["last_check_status"] = fetcher.get_last_status_code()
85	-
86	-	self.contents = fetcher.screenshot
87	-
88	-	image = Image.open(io.BytesIO(fetcher.screenshot))
89	-
90	-	# @todo different choice?
91	-	# https://github.com/JohannesBuchner/imagehash#references
92	-	fetched_hash = str(imagehash.average_hash(image))
93	-
94	-	# The main thing that all this at the moment comes down to :)
95	-	if watch['previous_md5'] != fetched_hash:
96	-	changed_detected = True
97	-
98	-	# Always record the new checksum
99	-	update_obj["previous_md5"] = fetched_hash
100	-
101	-	# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
102	-	if not watch.get('previous_md5'):
103	-	watch['previous_md5'] = fetched_hash
104	-
105	-	return changed_detected, update_obj
106	-

■ ■ ■ ■ ■ ■

changedetectionio/templates/watch-overview.html

		skipped 14 lines
15	15		<div>
16	16		{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
17	17		{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
	18	+	{% if webdriver_enabled %}
18	19		<br/>
19	20		{{ render_field(form.fetch_processor) }}
	21	+	{% endif %}
20	22		</div>
21	23		<div>
22	24		{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
		skipped 140 lines

WIP