Merge branch 'master' into fetchers-abstract-graphic-compare

cmake?
Merge branch 'adding-test-webdriver_js_execute_code' into fetchers-abstract-graphic-compare
2025-11-02 23:57:22 +00:00 · 2022-10-11 15:12:16 +02:00 · 2022-10-11 15:03:56 +02:00 · 2022-10-11 14:26:12 +02:00 · 2022-10-11 14:21:52 +02:00 · 2022-10-11 13:52:48 +02:00
23 changed files with 774 additions and 98 deletions
--- a/1
+++ b/1
@@ -5,6 +5,7 @@ FROM python:3.8-slim as builder
 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1

 RUN apt-get update && apt-get install -y --no-install-recommends \
+    cmake \
    g++ \
    gcc \
    libc-dev \
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -396,18 +396,20 @@ def changedetection_app(config=None, datastore_o=None):
        existing_tags = datastore.get_all_tags()

        form = forms.quickWatchForm(request.form)
+        webdriver_enabled = True if os.getenv('PLAYWRIGHT_DRIVER_URL', False) or os.getenv('PLAYWRIGHT_DRIVER_URL', False) else False
+
        output = render_template("watch-overview.html",
-                                 form=form,
-                                 watches=sorted_watches,
-                                 tags=existing_tags,
                                 active_tag=limit_tag,
                                 app_rss_token=datastore.data['settings']['application']['rss_access_token'],
-                                 has_unviewed=datastore.has_unviewed,
-                                 # Don't link to hosting when we're on the hosting environment
-                                 hosted_sticky=os.getenv("SALTED_PASS", False) == False,
+                                 form=form,
                                 guid=datastore.data['app_guid'],
-                                 queued_uuids=[uuid for p,uuid in update_q.queue])
-
+                                 has_unviewed=datastore.has_unviewed,
+                                 hosted_sticky=os.getenv("SALTED_PASS", False) == False,
+                                 queued_uuids=[uuid for p, uuid in update_q.queue],
+                                 tags=existing_tags,
+                                 watches=sorted_watches,
+                                 webdriver_enabled=webdriver_enabled
+                                 )

        if session.get('share-link'):
            del(session['share-link'])
@@ -489,7 +491,7 @@ def changedetection_app(config=None, datastore_o=None):

        import hashlib

-        from changedetectionio import fetch_site_status
+        from .fetch_processor import json_html_plaintext

        # Get the most recent one
        newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
@@ -503,7 +505,7 @@ def changedetection_app(config=None, datastore_o=None):
                      encoding='utf-8') as file:
                raw_content = file.read()

-                handler = fetch_site_status.perform_site_check(datastore=datastore)
+                handler = json_html_plaintext.perform_site_check(datastore=datastore)
                stripped_content = html_tools.strip_ignore_text(raw_content,
                                                             datastore.data['watching'][uuid]['ignore_text'])

@@ -636,20 +638,31 @@ def changedetection_app(config=None, datastore_o=None):
            # Only works reliably with Playwright
            visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'

+            watch = datastore.data['watching'].get(uuid)
+
+            # Which tabs to show/hide ?
+            enabled_tabs = []
+            if watch.get('fetch_processor') == 'json_html_plaintext' or not watch.get('fetch_processor'):
+                enabled_tabs.append('visual-selector')
+                enabled_tabs.append('text-filters-and-triggers')
+
+            if watch.get('fetch_processor') == 'image':
+                enabled_tabs.append('visual-selector')

            output = render_template("edit.html",
-                                     uuid=uuid,
-                                     watch=datastore.data['watching'][uuid],
-                                     form=form,
-                                     has_empty_checktime=using_default_check_time,
-                                     has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
-                                     using_global_webdriver_wait=default['webdriver_delay'] is None,
                                     current_base_url=datastore.data['settings']['application']['base_url'],
                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
+                                     enabled_tabs = enabled_tabs,
+                                     form=form,
+                                     has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
+                                     has_empty_checktime=using_default_check_time,
+                                     playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
                                     settings_application=datastore.data['settings']['application'],
+                                     using_global_webdriver_wait=default['webdriver_delay'] is None,
+                                     uuid=uuid,
                                     visualselector_data_is_ready=visualselector_data_is_ready,
                                     visualselector_enabled=visualselector_enabled,
-                                     playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
+                                     watch=watch,
                                     )

        return output
@@ -782,6 +795,86 @@ def changedetection_app(config=None, datastore_o=None):

        return redirect(url_for('index'))

+
+    @app.route("/diff/image/<string:uuid>", methods=['GET'])
+    @login_required
+    def diff_image_history_page(uuid):
+
+        # More for testing, possible to return the first/only
+        if uuid == 'first':
+            uuid = list(datastore.data['watching'].keys()).pop()
+
+        extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
+        try:
+            watch = datastore.data['watching'][uuid]
+        except KeyError:
+            flash("No history found for the specified link, bad link?", "error")
+            return redirect(url_for('index'))
+
+        history = watch.history
+        dates = list(history.keys())
+
+        if len(dates) < 2:
+            flash("Not enough saved change detection snapshots to produce a report.", "error")
+            return redirect(url_for('index'))
+
+        previous_version = dates[-2]
+
+        datastore.set_last_viewed(uuid, time.time())
+
+        output = render_template("diff-image.html",
+                                 watch=watch,
+                                 extra_stylesheets=extra_stylesheets,
+                                 versions=dates[:-1], # All except current/last
+                                 uuid=uuid,
+                                 newest_version_timestamp=dates[-1],
+                                 current_previous_version=str(previous_version),
+                                 current_diff_url=watch['url'],
+                                 extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
+                                 left_sticky=True,
+                                 last_error=watch['last_error'],
+                                 last_error_text=watch.get_error_text(),
+                                 last_error_screenshot=watch.get_error_snapshot()
+                                 )
+        return output
+
+
+    @app.route("/preview/image/<string:uuid>", methods=['GET'])
+    @login_required
+    def preview_image_history_page(uuid):
+
+        # More for testing, possible to return the first/only
+        if uuid == 'first':
+            uuid = list(datastore.data['watching'].keys()).pop()
+
+        extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
+        try:
+            watch = datastore.data['watching'][uuid]
+        except KeyError:
+            flash("No history found for the specified link, bad link?", "error")
+            return redirect(url_for('index'))
+
+        history = watch.history
+        dates = list(history.keys())
+
+        if len(dates) < 1:
+            flash("Not enough saved change detection snapshots to produce a report.", "error")
+            return redirect(url_for('index'))
+
+        output = render_template("preview-image.html",
+                                 watch=watch,
+                                 extra_stylesheets=extra_stylesheets,
+                                 uuid=uuid,
+                                 current_diff_url=watch['url'],
+                                 newest_history_key = watch.newest_history_key,
+                                 extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
+                                 left_sticky=True,
+                                 last_error=watch['last_error'],
+                                 last_error_text=watch.get_error_text(),
+                                 last_error_screenshot=watch.get_error_snapshot()
+                                 )
+        return output
+
    @app.route("/diff/<string:uuid>", methods=['GET'])
    @login_required
    def diff_history_page(uuid):
@@ -947,6 +1040,67 @@ def changedetection_app(config=None, datastore_o=None):

        return output

+    @app.route("/preview/image/<string:uuid>/<string:history_timestamp>")
+    def render_single_image(uuid, history_timestamp):
+
+        watch = datastore.data['watching'].get(uuid)
+        dates = list(watch.history.keys())
+
+
+        if not history_timestamp or history_timestamp == 'None':
+            history_timestamp = dates[-2]
+
+
+        filename = watch.history[history_timestamp]
+        with open(filename, 'rb') as f:
+            img = f.read()
+
+        response = make_response(img)
+
+        response.headers['Content-type'] = 'image/png'
+        response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
+        response.headers['Pragma'] = 'no-cache'
+        response.headers['Expires'] = 0
+
+        return response
+
+
+
+    # Diff renderer for images
+    # Renders the diff which includes the red box around what changes
+    # We always compare the newest against whatever compare_date we are given
+    @app.route("/diff/image/<string:uuid>/<string:compare_date>")
+    def render_diff_image(uuid, compare_date):
+        from changedetectionio import image_diff
+
+        from flask import make_response
+        watch = datastore.data['watching'].get(uuid)
+
+        dates = list(watch.history.keys())
+        if len(dates) < 2:
+            flash("Not enough saved change detection snapshots to produce a report.", "error")
+            return redirect(url_for('index'))
+
+        if not compare_date or compare_date == 'None':
+            compare_date = dates[-2]
+
+        new_img = watch.history[watch.newest_history_key]
+        prev_img = watch.history[compare_date]
+
+        try:
+            img = image_diff.render_diff(new_img, prev_img)
+        except ValueError as e:
+            print ("EXCEPTION: Diff image - got exception {} reverting to raw image without rendering difference".format(str(e)))
+            with open(new_img, 'rb') as f:
+                img = f.read()
+
+
+        resp = make_response(img)
+        resp.headers['Content-Type'] = 'image/jpeg'
+        return resp
+
+
+
    @app.route("/settings/notification-logs", methods=['GET'])
    @login_required
    def notification_logs():
@@ -1095,12 +1249,24 @@ def changedetection_app(config=None, datastore_o=None):
            return redirect(url_for('index'))

        url = request.form.get('url').strip()
+
        if datastore.url_exists(url):
            flash('The URL {} already exists'.format(url), "error")
            return redirect(url_for('index'))

        add_paused = request.form.get('edit_and_watch_submit_button') != None
-        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
+        fetch_processor = request.form.get('fetch_processor')
+
+        extras = {'paused': add_paused}
+        if fetch_processor:
+            extras['fetch_processor']=fetch_processor
+            if fetch_processor == 'image':
+                extras['fetch_backend'] = 'html_webdriver'
+
+        new_uuid = datastore.add_watch(url=url,
+                                       tag=request.form.get('tag').strip(),
+                                       extras=extras
+                                       )


        if not add_paused and new_uuid:
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -21,7 +21,6 @@ class Non200ErrorCodeReceived(Exception):
            self.page_text = html_tools.html_to_text(page_html)
        return

-
 class JSActionExceptions(Exception):
    def __init__(self, status_code, url, screenshot, message=''):
        self.status_code = status_code
@@ -66,13 +65,14 @@ class ReplyWithContentButNoText(Exception):
        return

 class Fetcher():
-    error = None
-    status_code = None
    content = None
-    headers = None
-
+    error = None
    fetcher_description = "No description"
+    headers = None
+    raw_content = None
+    status_code = None
    webdriver_js_execute_code = None
+
    xpath_element_js = """               
                // Include the getXpath script directly, easier than fetching
                !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
@@ -202,6 +202,7 @@ class Fetcher():

    # Will be needed in the future by the VisualSelector, always get this where possible.
    screenshot = False
+    element_screenshot = None
    system_http_proxy = os.getenv('HTTP_PROXY')
    system_https_proxy = os.getenv('HTTPS_PROXY')

@@ -310,7 +311,8 @@ class base_html_playwright(Fetcher):
            request_body,
            request_method,
            ignore_status_codes=False,
-            current_css_filter=None):
+            current_css_filter=None
+            ):

        from playwright.sync_api import sync_playwright
        import playwright._impl._api_types
@@ -410,11 +412,18 @@ class base_html_playwright(Fetcher):
            page.wait_for_timeout(500)

            self.content = page.content()
+            self.raw_content = page.content()
+
            self.status_code = response.status
            self.headers = response.all_headers()

-            if current_css_filter is not None:
+            if current_css_filter is not None and len(current_css_filter):
                page.evaluate("var css_filter={}".format(json.dumps(current_css_filter)))
+
+                el = page.locator(current_css_filter)
+                if el:
+                    el.scroll_into_view_if_needed()
+                    self.element_screenshot = el.screenshot()
            else:
                page.evaluate("var css_filter=''")

@@ -429,9 +438,9 @@ class base_html_playwright(Fetcher):
            # acceptable screenshot quality here
            try:
                # Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
-                page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
+                #page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
                # The actual screenshot
-                self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
+                self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 82)))
            except Exception as e:
                context.close()
                browser.close()
@@ -533,6 +542,7 @@ class base_html_webdriver(Fetcher):
        # @todo - dom wait loaded?
        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
        self.content = self.driver.page_source
+        self.raw_content = self.driver.page_source
        self.headers = {}

        self.screenshot = self.driver.get_screenshot_as_png()
@@ -619,6 +629,7 @@ class html_requests(Fetcher):

        self.status_code = r.status_code
        self.content = r.text
+        self.raw_content = r.content
        self.headers = r.headers


--- a/changedetectionio/fetch_processor/init.py
+++ b/changedetectionio/fetch_processor/init.py
@@ -0,0 +1,12 @@
+available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Graphically by image or web-page')]
+
+class fetch_processor():
+    contents = b''
+    screenshot = None
+    datastore = None
+
+    """
+    base class for all fetch processors
+    - json_html_plaintext
+    - image (future)
+    """
--- a/changedetectionio/fetch_processor/image.py
+++ b/changedetectionio/fetch_processor/image.py
@@ -0,0 +1,130 @@
+import hashlib
+import imagehash
+from PIL import Image
+import io
+import logging
+import os
+import re
+import time
+import urllib3
+
+# fetch processor for requesting and comparing a single image
+# can use both requests and playwright/selenium
+
+# - imagehash for change detection (or https://github.com/dgtlmoon/changedetection.io/pull/419/files#diff-7d3854710a6c0faead783f75850100a4c4b69409309200d3a83692dc9783bf6eR17 ?)
+# - skimage.metrics import structural_similarity for viewing the diff
+
+
+from changedetectionio import content_fetcher, html_tools
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+from . import fetch_processor
+
+
+# Some common stuff here that can be moved to a base class
+# (set_proxy_from_list)
+class perform_site_check(fetch_processor):
+    xpath_data = None
+
+    def __init__(self, *args, datastore, **kwargs):
+        self.datastore = datastore
+        super().__init__(*args, **kwargs)
+
+    def run(self, uuid):
+        changed_detected = False
+        screenshot = False  # as bytes
+        stripped_text_from_html = ""
+
+        watch = self.datastore.data['watching'].get(uuid)
+
+
+        if watch.get('fetch_backend') != 'html_webdriver':
+            raise Exception(
+                "Requires a Chrome compatible fetcher enabled."
+            )
+
+        # Protect against file:// access
+        if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
+            raise Exception(
+                "file:// type access is denied for security reasons."
+            )
+
+        # Unset any existing notification error
+        update_obj = {'last_notification_error': False, 'last_error': False}
+
+        extra_headers = self.datastore.data['watching'][uuid].get('headers')
+
+        # Tweak the base config with the per-watch ones
+        request_headers = self.datastore.data['settings']['headers'].copy()
+        request_headers.update(extra_headers)
+
+        # https://github.com/psf/requests/issues/4525
+        # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
+        # do this by accident.
+        if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
+            request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
+
+        timeout = self.datastore.data['settings']['requests']['timeout']
+        url = watch.get('url')
+        request_body = self.datastore.data['watching'][uuid].get('body')
+        request_method = self.datastore.data['watching'][uuid].get('method')
+        ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
+
+        prefer_backend = watch['fetch_backend']
+        if hasattr(content_fetcher, prefer_backend):
+            klass = getattr(content_fetcher, prefer_backend)
+        else:
+            # If the klass doesnt exist, just use a default
+            klass = getattr(content_fetcher, "html_requests")
+
+        proxy_args = self.datastore.get_preferred_proxy_for_watch(uuid)
+        fetcher = klass(proxy_override=proxy_args)
+
+        fetcher.run(
+            ignore_status_codes=ignore_status_codes,
+            request_body=request_body,
+            request_headers=request_headers,
+            request_method=request_method,
+            current_css_filter=watch.get('css_filter'),
+            timeout=timeout,
+            url=url
+        )
+
+        fetcher.quit()
+
+        # if not image/foobar in mimetype
+        # raise content_fecther.NotAnImage(mimetype) ?
+        # or better to try load with PIL and catch exception?
+
+        update_obj["last_check_status"] = fetcher.get_last_status_code()
+
+        if 'image' in fetcher.headers['content-type']:
+            self.contents = fetcher.raw_content
+        else:
+            self.contents = fetcher.element_screenshot if fetcher.element_screenshot else fetcher.screenshot
+
+        # Used for visual-selector
+        self.xpath_data = fetcher.xpath_data
+        self.screenshot = fetcher.screenshot
+
+        now = time.time()
+        image = Image.open(io.BytesIO(self.contents))
+
+        # @todo different choice?
+        # https://github.com/JohannesBuchner/imagehash#references
+        fetched_hash = str(imagehash.average_hash(image))
+        print(uuid, "Time to image hash", time.time() - now)
+
+        # The main thing that all this at the moment comes down to :)
+        if watch['previous_md5'] != fetched_hash:
+            changed_detected = True
+
+        # Always record the new checksum
+        update_obj["previous_md5"] = fetched_hash
+
+        # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
+        if not watch.get('previous_md5'):
+            watch['previous_md5'] = fetched_hash
+
+        return changed_detected, update_obj
--- a/changedetectionio/fetch_processor/json_html_plaintext.py
+++ b/changedetectionio/fetch_processor/json_html_plaintext.py
@@ -9,16 +9,18 @@ from changedetectionio import content_fetcher, html_tools

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

+from . import fetch_processor

 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
-class perform_site_check():
+class perform_site_check(fetch_processor):
    screenshot = None
    xpath_data = None

    def __init__(self, *args, datastore, **kwargs):
-        super().__init__(*args, **kwargs)
        self.datastore = datastore
+        super().__init__(*args, **kwargs)
+

    # Doesn't look like python supports forward slash auto enclosure in re.findall
    # So convert it to inline flag "foobar(?i)" type configuration
@@ -294,4 +296,6 @@ class perform_site_check():
        if not watch.get('previous_md5'):
            watch['previous_md5'] = fetched_md5

-        return changed_detected, update_obj, text_content_before_ignored_filter
+        self.contents = text_content_before_ignored_filter
+
+        return changed_detected, update_obj
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -321,8 +321,11 @@ class ValidateCSSJSONXPATHInput(object):


 class quickWatchForm(Form):
+    from . import fetch_processor
+
    url = fields.URLField('URL', validators=[validateURL()])
    tag = StringField('Group tag', [validators.Optional()])
+    fetch_processor = RadioField(u'Compare as', choices=fetch_processor.available_fetchers, default=fetch_processor.available_fetchers[0][0])
    watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
    edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})

--- a/changedetectionio/image_diff.py
+++ b/changedetectionio/image_diff.py
@@ -0,0 +1,44 @@
+from skimage.metrics import structural_similarity as compare_ssim
+import argparse
+import imutils
+import cv2
+
+# From https://www.pyimagesearch.com/2017/06/19/image-difference-with-opencv-and-python/
+def render_diff(fpath_imageA, fpath_imageB):
+
+	import time
+	now = time.time()
+
+	imageA = cv2.imread(fpath_imageA)
+	imageB = cv2.imread(fpath_imageB)
+
+	# convert the images to grayscale
+	grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
+	grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
+
+	# compute the Structural Similarity Index (SSIM) between the two
+	# images, ensuring that the difference image is returned
+	(score, diff) = compare_ssim(grayA, grayB, full=True)
+	diff = (diff * 255).astype("uint8")
+	print("SSIM: {}".format(score))
+
+	# threshold the difference image, followed by finding contours to
+	# obtain the regions of the two input images that differ
+	thresh = cv2.threshold(diff, 0, 255,
+		cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
+	cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
+		cv2.CHAIN_APPROX_SIMPLE)
+	cnts = imutils.grab_contours(cnts)
+
+	# loop over the contours
+	for c in cnts:
+		# compute the bounding box of the contour and then draw the
+		# bounding box on both input images to represent where the two
+		# images differ
+		(x, y, w, h) = cv2.boundingRect(c)
+		cv2.rectangle(imageA, (x, y), (x + w, y + h), (0, 0, 255), 1)
+		cv2.rectangle(imageB, (x, y), (x + w, y + h), (0, 0, 255), 1)
+
+	#return cv2.imencode('.jpg', imageB)[1].tobytes()
+	print ("Image comparison processing time", time.time()-now)
+	return cv2.imencode('.jpg', imageA)[1].tobytes()
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -14,42 +14,43 @@ class model(dict):
    __newest_history_key = None
    __history_n=0
    __base_config = {
-            'url': None,
-            'tag': None,
-            'last_checked': 0,
-            'paused': False,
-            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
-            #'newest_history_key': 0,
-            'title': None,
-            'previous_md5': False,
-            'uuid': str(uuid_builder.uuid4()),
-            'headers': {},  # Extra headers to send
-            'body': None,
-            'method': 'GET',
-            #'history': {},  # Dict of timestamp and output stripped filename
-            'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
            # Custom notification content
-            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
-            'notification_title': None,
-            'notification_body': None,
-            'notification_format': default_notification_format_for_watch,
-            'notification_muted': False,
-            'css_filter': '',
-            'last_error': False,
-            'extract_text': [],  # Extract text by regex after filters
-            'subtractive_selectors': [],
-            'trigger_text': [],  # List of text or regex to wait for until a change is detected
-            'text_should_not_be_present': [], # Text that should not present
-            'fetch_backend': None,
-            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
-            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
-            'extract_title_as_title': False,
-            'check_unique_lines': False, # On change-detected, compare against all history if its something new
-            'proxy': None, # Preferred proxy connection
            # Re #110, so then if this is set to None, we know to use the default value instead
            # Requires setting to None on submit if it's the same as the default
            # Should be all None by default, so we use the system default in this case.
+            #'history': {},  # Dict of timestamp and output stripped filename
+            #'newest_history_key': 0,
+            'body': None,
+            'check_unique_lines': False, # On change-detected, compare against all history if its something new
+            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
+            'css_filter': '',
+            'extract_text': [],  # Extract text by regex after filters
+            'extract_title_as_title': False,
+            'fetch_backend': None,
+            'fetch_processor': 'json_html_plaintext', # json_html_plaintext, image, rendered_webpage
+            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
+            'headers': {},  # Extra headers to send
+            'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
+            'last_checked': 0,
+            'last_error': False,
+            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
+            'method': 'GET',
+            'notification_body': None,
+            'notification_format': default_notification_format_for_watch,
+            'notification_muted': False,
+            'notification_title': None,
+            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
+            'paused': False,
+            'previous_md5': False,
+            'proxy': None, # Preferred proxy connection
+            'subtractive_selectors': [],
+            'tag': None,
+            'text_should_not_be_present': [], # Text that should not present
            'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
+            'title': None,
+            'trigger_text': [],  # List of text or regex to wait for until a change is detected
+            'url': None,
+            'uuid': str(uuid_builder.uuid4()),
            'webdriver_delay': None,
            'webdriver_js_execute_code': None, # Run before change-detection
        }
@@ -145,18 +146,25 @@ class model(dict):
        bump = self.history
        return self.__newest_history_key

-    # Save some text file to the appropriate path and bump the history
-    # result_obj from fetch_site_status.run()
-    def save_history_text(self, contents, timestamp):
+    def save_history_artifact(self, contents: bytes, timestamp):
        import uuid
        import logging
+        import magic
+        import re
+        suffix = 'bin'
+        # detect extension type
+        mtype = magic.from_buffer(contents, mime=True)
+        if mtype:
+            r = re.search(r'image/(\w+)', mtype, re.IGNORECASE)
+            if r:
+                suffix = r.group(1)

        output_path = "{}/{}".format(self.__datastore_path, self['uuid'])

        self.ensure_data_dir_exists()

-        snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
-        logging.debug("Saving history text {}".format(snapshot_fname))
+        snapshot_fname = "{}/{}.{}".format(output_path, uuid.uuid4(), suffix)
+        logging.debug("Saving history artifact {}".format(snapshot_fname))

        with open(snapshot_fname, 'wb') as f:
            f.write(contents)
--- a/changedetectionio/static/images/picture-frame.svg
+++ b/changedetectionio/static/images/picture-frame.svg
@@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Generator: Adobe Illustrator 19.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+
+<svg
+   version="1.1"
+   id="Layer_1"
+   x="0px"
+   y="0px"
+   viewBox="0 0 20.745352 20.745251"
+   xml:space="preserve"
+   width="20.745352"
+   height="20.745251"
+   sodipodi:docname="picture-frame.svg"
+   inkscape:version="1.1.1 (1:1.1+202109281949+c3084ef5ed)"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg"><sodipodi:namedview
+   id="namedview31"
+   pagecolor="#ffffff"
+   bordercolor="#666666"
+   borderopacity="1.0"
+   inkscape:pageshadow="2"
+   inkscape:pageopacity="0.0"
+   inkscape:pagecheckerboard="0"
+   showgrid="false"
+   fit-margin-top="0"
+   fit-margin-left="0"
+   fit-margin-right="0"
+   fit-margin-bottom="0"
+   inkscape:zoom="24.215073"
+   inkscape:cx="11.810825"
+   inkscape:cy="10.158962"
+   inkscape:window-width="1920"
+   inkscape:window-height="1056"
+   inkscape:window-x="1920"
+   inkscape:window-y="0"
+   inkscape:window-maximized="1"
+   inkscape:current-layer="g1325" /><defs
+   id="defs57">
+		
+		
+	</defs>
+<g
+   id="g22"
+   transform="translate(-141.68664,-143.32441)">
+	
+	
+	
+	
+	<g
+   id="g986"
+   transform="matrix(0.09174031,0,0,0.09174031,139.41786,139.41786)"><g
+     id="g1313" /><g
+     id="g18">
+			
+			
+			<g
+   id="g1325"
+   transform="matrix(1.0989302,0,0,1.0989302,-30.889712,-13.037446)"><g
+     id="g1413"><rect
+       x="58.112999"
+       y="58.112999"
+       style="fill:#95e1d3"
+       width="190.77765"
+       height="190.77299"
+       id="rect4"
+       rx="0"
+       ry="0" /><polygon
+       style="fill:#eaffd0"
+       points="117.389,248.887 183.138,135.007 248.887,248.887 "
+       id="polygon6" /><polygon
+       style="fill:#eaffd0"
+       points="100.26,175.887 58.113,248.887 117.389,248.887 129.898,227.221 "
+       id="polygon8" /><circle
+       style="fill:#fce38a"
+       cx="141.82001"
+       cy="119.433"
+       r="16.547001"
+       id="circle10" /><path
+       style="fill:#414042"
+       d="M 248.887,50.613 H 58.113 c -4.142,0 -7.5,3.357 -7.5,7.5 v 190.773 c 0,4.118 3.362,7.5 7.5,7.5 h 59.276 131.498 c 4.06,0 7.5,-3.304 7.5,-7.5 V 58.113 c 0,-4.142 -3.358,-7.5 -7.5,-7.5 z m -7.5,15 v 155.283 l -51.754,-89.64 c -2.886,-4.998 -10.11,-4.988 -12.99,0 l -46.745,80.965 -23.143,-40.085 c -2.886,-4.998 -10.11,-4.988 -12.99,0 l -28.151,48.76 V 65.613 Z m -141.127,125.274 20.978,36.335 -7.823,13.549 -0.356,0.616 H 71.103 Z m 30.12,50.5 6.013,-10.415 c 0.001,-0.002 0.002,-0.004 0.003,-0.006 l 46.742,-80.959 52.759,91.38 z"
+       id="path14" /><path
+       style="fill:#414042"
+       d="m 141.82,143.48 c 13.259,0 24.046,-10.787 24.046,-24.047 0,-13.26 -10.787,-24.047 -24.046,-24.047 -13.259,0 -24.046,10.787 -24.046,24.047 0,13.26 10.786,24.047 24.046,24.047 z m 0,-33.093 c 4.988,0 9.046,4.059 9.046,9.047 0,4.988 -4.058,9.047 -9.046,9.047 -4.988,0 -9.046,-4.059 -9.046,-9.047 -0.001,-4.989 4.057,-9.047 9.046,-9.047 z"
+       id="path16" /></g></g>
+		</g></g>
+</g>
+<g
+   id="g24"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g26"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g28"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g30"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g32"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g34"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g36"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g38"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g40"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g42"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g44"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g46"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g48"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g50"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+<g
+   id="g52"
+   transform="translate(-141.68664,-143.32441)">
+</g>
+</svg>
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -578,3 +578,15 @@ ul {
    display: inline;
    height: 26px;
    vertical-align: middle; }
+
+#quickwatch-fetch-processor {
+  color: #fff;
+  font-size: 80%; }
+  #quickwatch-fetch-processor ul {
+    padding: 0px;
+    list-style-type: none; }
+    #quickwatch-fetch-processor ul li {
+      display: inline-block;
+      margin-right: 1em; }
+      #quickwatch-fetch-processor ul li label:hover {
+        cursor: pointer; }
--- a/changedetectionio/static/styles/styles.scss
+++ b/changedetectionio/static/styles/styles.scss
@@ -803,4 +803,24 @@ ul {
  padding: 0.5rem;
  border-radius: 5px;
  color: #ff3300;
-}
+}
+
+#quickwatch-fetch-processor {
+  color: #fff;
+  font-size: 80%;
+
+  ul {
+    padding: 0px;
+    list-style-type: none;
+    li {
+      display: inline-block;
+      margin-right: 1em;
+      label {
+            &:hover {
+        cursor: pointer;
+      }
+      }
+    }
+  }
+}
+
--- a/changedetectionio/templates/diff-image.html
+++ b/changedetectionio/templates/diff-image.html
@@ -0,0 +1,64 @@
+{% extends 'base.html' %}
+
+{% block content %}
+
+<div id="settings">
+    <h1>Differences</h1>
+    <form class="pure-form " action="" method="GET">
+        <fieldset>
+            {% if versions|length >= 1 %}
+            <label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
+            <select id="diff-version" name="previous_version">
+                {% for version in versions %}
+                <option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
+                    {{version}}
+                </option>
+                {% endfor %}
+            </select>
+            <button type="submit" class="pure-button pure-button-primary">Go</button>
+            {% endif %}
+        </fieldset>
+    </form>
+
+</div>
+
+<div id="diff-ui">
+    <script
+  defer
+  src="https://unpkg.com/img-comparison-slider@7/dist/index.js"
+></script>
+<link
+  rel="stylesheet"
+  href="https://unpkg.com/img-comparison-slider@7/dist/styles.css"
+/>
+
+<img-comparison-slider>
+  <img  slot="first"  src="{{ url_for('render_diff_image', uuid=uuid, compare_date=current_previous_version) }}" />
+    <img  slot="second" src="{{ url_for('render_single_image', uuid=uuid, history_timestamp=current_previous_version) }}" />
+
+</img-comparison-slider>
+
+</div>
+
+
+<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.js')}}"></script>
+
+<script defer="">
+window.onload = function() {
+    /* Set current version date as local time in the browser also */
+    var current_v = document.getElementById("current-v-date");
+    var dateObject = new Date({{ newest_version_timestamp }}*1000);
+    current_v.innerHTML=dateObject.toLocaleString();
+
+    /* Convert what is options from UTC time.time() to local browser time */
+    var diffList=document.getElementById("diff-version");
+    if (typeof(diffList) != 'undefined' && diffList != null) {
+        for (var option of diffList.options) {
+          var dateObject = new Date(option.value*1000);
+          option.label=dateObject.toLocaleString();
+        }
+    }
+}
+</script>
+
+{% endblock %}
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -25,7 +25,9 @@
        <ul>
            <li class="tab" id=""><a href="#general">General</a></li>
            <li class="tab"><a href="#request">Request</a></li>
+            {% if 'visual-selector' in enabled_tabs %}
            <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
+            {%endif%}
            <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
            <li class="tab"><a href="#notifications">Notifications</a></li>
        </ul>
@@ -155,6 +157,7 @@ User-Agent: wonderbra 1.0") }}
            </div>

            <div class="tab-pane-inner" id="filters-and-triggers">
+                {% if 'text-filters-and-triggers' in enabled_tabs %}
                    <div class="pure-control-group">
                            <strong>Pro-tips:</strong><br/>
                            <ul>
@@ -166,12 +169,14 @@ User-Agent: wonderbra 1.0") }}
                                </li>
                            </ul>
                    </div>
+
                    <fieldset>
                        <div class="pure-control-group">
                            {{ render_checkbox_field(form.check_unique_lines) }}
                            <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
                        </div>
                    </fieldset>
+                {% endif %}
                    <div class="pure-control-group">
                        {% set field = render_field(form.css_filter,
                            placeholder=".class-name or #some-id, or other CSS selector rule.",
@@ -202,6 +207,9 @@ User-Agent: wonderbra 1.0") }}
                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
                </span>
                    </div>
+
+                            {% if 'text-filters-and-triggers' in enabled_tabs %}
+
                    <div class="pure-control-group">
                      {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
 footer
@@ -277,6 +285,8 @@ Unavailable") }}
                        </span>
                    </div>
                </fieldset>
+
+                {% endif %}
            </div>

            <div class="tab-pane-inner visual-selector-ui" id="visualselector">
--- a/changedetectionio/templates/preview-image.html
+++ b/changedetectionio/templates/preview-image.html
@@ -0,0 +1,11 @@
+{% extends 'base.html' %}
+{% block content %}
+<div id="settings">
+    <h1>Preview</h1>
+ </div>
+
+<div id="diff-ui">
+    <img style="max-width: 100%" src="{{ url_for('render_single_image', uuid=uuid, history_timestamp=newest_history_key) }}" />
+</div>
+
+{% endblock %}
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@@ -15,10 +15,18 @@
                <div>
                    {{ render_simple_field(form.url, placeholder="https://...", required=true) }}
                    {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
+                    <span>
+                        {{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
+                        {{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
+                    </span>
+                    {% if webdriver_enabled %}
+                    <div id="quickwatch-fetch-processor">
+                        {{ render_field(form.fetch_processor) }}
+                    </div>
+                    {% endif %}
                </div>
                <div>
-                    {{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
-                    {{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
+
                </div>
            </div>
        </fieldset>
@@ -87,11 +95,11 @@
                    <a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
                </td>
                <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
-                    <a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
-                    <a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
-
-                    {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
+                    <a class="external" title="Open in new window" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
+                    <a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" alt="Share" title="Share"/></a>

+                    {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Fetching with Chrome" title="Fetching with Chrome"/>{% endif %}
+                    {%if watch.fetch_processor == "image" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='picture-frame.svg')}}" alt="Comparing graphically" title="Comparing graphically"/>{% endif %}
                    {% if watch.last_error is defined and watch.last_error != False %}
                    <div class="fetch-error">{{ watch.last_error }}</div>
                    {% endif %}
@@ -114,10 +122,20 @@
                       class="recheck pure-button button-small pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
                    <a href="{{ url_for('edit_page', uuid=watch.uuid)}}" class="pure-button button-small pure-button-primary">Edit</a>
                    {% if watch.history_n >= 2 %}
-                    <a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
+                      {% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
+                        <a href="{{ url_for('diff_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
+                      {% else %}
+                        <a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
+                      {% endif %}
+
                    {% else %}
                        {% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
-                            <a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
+
+                    {% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
+                        <a href="{{ url_for('preview_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Preview</a>
+                      {% else %}
+                        <a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
+                      {% endif %}
                        {% endif %}
                    {% endif %}
                </td>
--- a/changedetectionio/tests/test_css_selector.py
+++ b/changedetectionio/tests/test_css_selector.py
@@ -47,7 +47,6 @@ def set_modified_response():

 # Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
 def test_css_filter_output():
-    from changedetectionio import fetch_site_status
    from inscriptis import get_text

    # Check text with sub-parts renders correctly
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@@ -71,7 +71,6 @@ def set_modified_response():


 def test_element_removal_output():
-    from changedetectionio import fetch_site_status
    from inscriptis import get_text

    # Check text with sub-parts renders correctly
--- a/changedetectionio/tests/test_ignore_regex_text.py
+++ b/changedetectionio/tests/test_ignore_regex_text.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python3

-import time
-from flask import url_for
 from . util import live_server_setup
 from changedetectionio import html_tools

@@ -11,7 +9,7 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_regex_text_func():
-    from changedetectionio import fetch_site_status
+    from ..fetch_processor import json_html_plaintext

    test_content = """
    but sometimes we want to remove the lines.
@@ -23,7 +21,7 @@ def test_strip_regex_text_func():

    ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]

-    fetcher = fetch_site_status.perform_site_check(datastore=False)
+    fetcher = json_html_plaintext.perform_site_check(datastore=False)
    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)

    assert b"but 1 lines" in stripped_content
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@@ -11,7 +11,7 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_text_func():
-    from changedetectionio import fetch_site_status
+    from ..fetch_processor import json_html_plaintext

    test_content = """
    Some content
@@ -23,7 +23,9 @@ def test_strip_text_func():

    ignore_lines = ["sometimes"]

-    fetcher = fetch_site_status.perform_site_check(datastore=False)
+    from ..fetch_processor import json_html_plaintext
+
+    fetcher = json_html_plaintext.perform_site_check(datastore=False)
    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)

    assert b"sometimes" not in stripped_content
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -92,7 +92,6 @@ def wait_for_all_checks(client):
        if not b'Checking now' in res.data:
            break
        logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
-
        attempt += 1

 def live_server_setup(live_server):
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -120,10 +120,6 @@ class update_worker(threading.Thread):
                os.unlink(full_path)

    def run(self):
-        from changedetectionio import fetch_site_status
-
-        update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
-
        while not self.app.config.exit.is_set():

            try:
@@ -135,21 +131,34 @@ class update_worker(threading.Thread):
                self.current_uuid = uuid

                if uuid in list(self.datastore.data['watching'].keys()):
+                    update_handler = None  # Interface object
                    changed_detected = False
-                    contents = b''
-                    screenshot = False
-                    update_obj= {}
-                    xpath_data = False
+                    update_obj = {}
                    process_changedetection_results = True
-                    print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
+                    watch = self.datastore.data['watching'].get(uuid)
+                    print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, watch.get('url')))
                    now = time.time()

                    try:
-                        changed_detected, update_obj, contents = update_handler.run(uuid)
+                        update_handler = None
+
+                        if watch.get('fetch_processor') == 'image':
+                            from .fetch_processor import image as processor_image
+                            update_handler = processor_image.perform_site_check(datastore=self.datastore)
+                        elif watch.get('fetch_processor') == 'rendered_webpage':
+                            from .fetch_processor import image as processor_rendered_webpage
+                            update_handler = processor_rendered_webpage.perform_site_check(datastore=self.datastore)
+                        else:
+                            # Anything else for now will be `json_html_plaintext`
+                            from .fetch_processor import json_html_plaintext as processor_json_html_plaintext
+                            update_handler = processor_json_html_plaintext.perform_site_check(datastore=self.datastore)
+
+                        changed_detected, update_obj = update_handler.run(uuid)
+
                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
                        # We then convert/.decode('utf-8') for the notification etc
-                        if not isinstance(contents, (bytes, bytearray)):
+                        if not isinstance(update_handler.contents, (bytes, bytearray)):
                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
                    except PermissionError as e:
                        self.app.logger.error("File permission error updating", uuid, str(e))
@@ -256,13 +265,12 @@ class update_worker(threading.Thread):
                    # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
                    if process_changedetection_results:
                        try:
-                            watch = self.datastore.data['watching'][uuid]
-                            fname = "" # Saved history text filename
+                            watch = self.datastore.data['watching'].get(uuid)

                            # For the FIRST time we check a site, or a change detected, save the snapshot.
                            if changed_detected or not watch['last_checked']:
                                # A change was detected
-                                watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
+                                watch.save_history_artifact(contents=update_handler.contents, timestamp=str(round(time.time())))

                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

--- a/requirements.txt
+++ b/requirements.txt
@@ -46,3 +46,11 @@ selenium ~= 4.1.0
 werkzeug ~= 2.0.0

 # playwright is installed at Dockerfile build time because it's not available on all platforms
+
+
+imagehash ~= 4.3.0
+pillow
+scikit-image
+imutils
+opencv-python
+python-magic
Author	SHA1	Message	Date
dgtlmoon	00ac8645f7	Merge branch 'master' into fetchers-abstract-graphic-compare	2022-10-11 15:12:16 +02:00
dgtlmoon	bef4b40d7f	cmake?	2022-10-11 15:03:56 +02:00
dgtlmoon	6dd26226bc	Merge branch 'adding-test-webdriver_js_execute_code' into fetchers-abstract-graphic-compare	2022-10-11 14:26:12 +02:00
dgtlmoon	779e9c1780	Test that the 'execute JS before' works	2022-10-11 14:21:52 +02:00
dgtlmoon	d9ed04ee56	fix bad test	2022-10-11 13:52:48 +02:00
dgtlmoon	0f63dca9f7	Merge branch 'master' into fetchers-abstract-graphic-compare	2022-10-11 13:06:49 +02:00
dgtlmoon	da7f613e9f	tidyups	2022-09-19 17:34:56 +02:00
dgtlmoon	bb03879aad	tweaks for merge	2022-09-19 14:24:40 +02:00
dgtlmoon	d23a39a7d8	Merge branch 'master' into fetchers-abstract-graphic-compare	2022-09-19 14:09:50 +02:00
dgtlmoon	338b4dacd0	WIP	2022-09-13 15:09:53 +02:00
dgtlmoon	c0fcae0076	WIP	2022-09-13 13:44:11 +02:00
dgtlmoon	0e0bd93234	WIP	2022-09-13 09:52:29 +02:00
dgtlmoon	c5b0c19836	WIP	2022-09-12 16:48:53 +02:00
dgtlmoon	c00459e18f	WIP slider	2022-09-12 16:12:35 +02:00
dgtlmoon	41db6652fe	WIP	2022-09-12 16:04:58 +02:00
dgtlmoon	20869a13b3	tweaks to saving	2022-09-12 12:45:29 +02:00
dgtlmoon	97c2cd633d	WIP	2022-09-12 12:13:03 +02:00
dgtlmoon	9244e2fb9c	sorting lines	2022-09-12 10:06:45 +02:00
dgtlmoon	a86cbd8b7a	Merge branch 'master' into fetchers-abstract	2022-09-12 10:04:23 +02:00
dgtlmoon	f35d91e4fb	Cleaner history suffix handling	2022-08-31 19:22:26 +02:00
dgtlmoon	687cf9beb4	More tidyup	2022-08-31 18:11:18 +02:00
dgtlmoon	f59b198ffb	fetch right class	2022-08-31 18:02:22 +02:00
dgtlmoon	518bdf5a3f	move this	2022-08-31 18:00:53 +02:00
dgtlmoon	dcd09359eb	cleanup	2022-08-31 17:57:02 +02:00
dgtlmoon	425f8ea632	Abstract out the fetch handlers for different fetch types	2022-08-31 17:52:32 +02:00