WIP

2026-04-30 23:00:30 +00:00 · 2023-04-08 21:14:03 +02:00 · 2023-04-08 20:35:13 +02:00 · 2023-04-08 20:12:30 +02:00 · 2023-04-08 18:49:27 +02:00 · 2023-04-06 16:12:18 +02:00
72 changed files with 2125 additions and 1056 deletions
@@ -98,7 +98,8 @@ jobs:
          platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
          cache-from: type=local,src=/tmp/.buildx-cache
          cache-to: type=local,dest=/tmp/.buildx-cache
-          provenance: false
+# Looks like this was disabled
+#          provenance: false

      # A new tagged release is required, which builds :tag and :latest
      - name: Build and push :tag
@@ -117,7 +118,8 @@ jobs:
          platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
          cache-from: type=local,src=/tmp/.buildx-cache
          cache-to: type=local,dest=/tmp/.buildx-cache
-          provenance: false
+# Looks like this was disabled
+#          provenance: false

      - name: Image digest
        run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}
@@ -1,38 +0,0 @@
-name: PyPi Test and Push tagged release
-
-# Triggers the workflow on push or pull request events
-on:
-  workflow_run:
-    workflows: ["ChangeDetection.io Test"]
-    tags: '*.*'
-    types: [completed]
-
-
-jobs:
-  test-build:
-    runs-on: ubuntu-latest
-    steps:
-
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.9
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.9
-
-
-      - name: Test that pip builds without error
-        run: |
-          pip3 --version
-          python3 -m pip install wheel
-          python3 setup.py bdist_wheel
-          python3 -m pip install dist/changedetection.io-*-none-any.whl --force
-          changedetection.io -d /tmp -p 10000 &
-          sleep 3
-          curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
-          killall -9 changedetection.io
-
-      # https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
-      # https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
-
-      # https://github.com/docker/buildx/issues/495#issuecomment-918925854
-#if: ${{ github.event_name == 'release'}}
@@ -50,10 +50,13 @@ jobs:
        run: |
          
          # Selenium fetch
-          docker run -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
+          docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
          
          # Playwright/Browserless fetch
-          docker run -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
+          docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
+          
+          # restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it
+          docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'

      - name: Test proxy interaction
        run: |
@@ -0,0 +1,36 @@
+name: ChangeDetection.io PIP package test
+
+# Triggers the workflow on push or pull request events
+
+# This line doesnt work, even tho it is the documented one
+on: [push, pull_request]
+
+  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
+  # @todo: some kind of path filter for requirements.txt and Dockerfile
+jobs:
+  test-pip-build-basics:
+    runs-on: ubuntu-latest
+    steps:
+        - uses: actions/checkout@v2
+
+        - name: Set up Python 3.9
+          uses: actions/setup-python@v2
+          with:
+            python-version: 3.9
+
+
+        - name: Test that the basic pip built package runs without error
+          run: |
+            set -e
+            mkdir dist
+            pip3 install wheel
+            python3 setup.py bdist_wheel            
+            pip3 install -r requirements.txt
+            rm ./changedetection.py
+            rm -rf changedetectio
+            
+            pip3 install dist/changedetection.io*.whl
+            changedetection.io -d /tmp -p 10000 &
+            sleep 3
+            curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
+            killall -9 changedetection.io
@@ -1,6 +1,7 @@
 recursive-include changedetectionio/api *
 recursive-include changedetectionio/blueprint *
 recursive-include changedetectionio/model *
+recursive-include changedetectionio/processors *
 recursive-include changedetectionio/res *
 recursive-include changedetectionio/static *
 recursive-include changedetectionio/templates *
@@ -1,4 +1,4 @@
-## Web Site Change Detection, Monitoring and Notification.
+## Web Site Change Detection, Restock monitoring and notifications.

 **_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**

@@ -49,6 +49,7 @@ Requires Playwright to be enabled.
 - Governmental department updates (changes are often only on their websites)
 - New software releases, security advisories when you're not on their mailing list.
 - Festivals with changes
+- Discogs restock alerts and monitoring
 - Realestate listing changes
 - Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
 - COVID related news from government websites
@@ -64,6 +65,7 @@ Requires Playwright to be enabled.
 - Get notified when certain keywords appear in Twitter search results
 - Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
 - Get alerts when new job positions are open on Bamboo HR and other job platforms
+- Website defacement monitoring

 _Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_

@@ -101,6 +103,8 @@ $ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/d

 `:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch.

+Alternative docker repository over at ghcr - [ghcr.io/dgtlmoon/changedetection.io](https://ghcr.io/dgtlmoon/changedetection.io)
+
 ### Windows

 See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
@@ -36,7 +36,7 @@ from flask import (
 from changedetectionio import html_tools
 from changedetectionio.api import api_v1

-__version__ = '0.40.3'
+__version__ = '0.41.1'

 datastore = None

@@ -64,6 +64,9 @@ app.config.exit = Event()

 app.config['NEW_VERSION_AVAILABLE'] = False

+if os.getenv('FLASK_SERVER_NAME'):
+    app.config['SERVER_NAME'] = os.getenv('FLASK_SERVER_NAME')
+
 #app.config["EXPLAIN_TEMPLATE_LOADING"] = True

 # Disables caching of the templates
@@ -337,8 +340,6 @@ def changedetection_app(config=None, datastore_o=None):
            if len(dates) < 2:
                continue

-            prev_fname = watch.history[dates[-2]]
-
            if not watch.viewed:
                # Re #239 - GUID needs to be individual for each event
                # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
@@ -359,9 +360,12 @@ def changedetection_app(config=None, datastore_o=None):

                watch_title = watch.get('title') if watch.get('title') else watch.get('url')
                fe.title(title=watch_title)
-                latest_fname = watch.history[dates[-1]]

-                html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
+                html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
+                                             newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
+                                             include_equal=False,
+                                             line_feed_sep="<br>")
+
                fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
                           type='CDATA')

@@ -511,8 +515,9 @@ def changedetection_app(config=None, datastore_o=None):
    # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?

    def edit_page(uuid):
-        from changedetectionio import forms
-        from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
+        from . import forms
+        from .blueprint.browser_steps.browser_steps import browser_step_ui_config
+        from . import processors

        using_default_check_time = True
        # More for testing, possible to return the first/only
@@ -527,6 +532,15 @@ def changedetection_app(config=None, datastore_o=None):
            flash("No watch with the UUID %s found." % (uuid), "error")
            return redirect(url_for('index'))

+        switch_processor = request.args.get('switch_processor')
+        if switch_processor:
+            for p in processors.available_processors():
+                if p[0] == switch_processor:
+                    datastore.data['watching'][uuid]['processor'] = switch_processor
+                    flash(f"Switched to mode - {p[1]}.")
+                    datastore.clear_watch_history(uuid)
+                    redirect(url_for('edit_page', uuid=uuid))
+
        # be sure we update with a copy instead of accidently editing the live object by reference
        default = deepcopy(datastore.data['watching'][uuid])

@@ -587,6 +601,16 @@ def changedetection_app(config=None, datastore_o=None):
            if datastore.proxy_list is not None and form.data['proxy'] == '':
                extra_update_obj['proxy'] = None

+            # Unsetting all filter_text methods should make it go back to default
+            # This particularly affects tests running
+            if 'filter_text_added' in form.data and not form.data.get('filter_text_added') \
+                    and 'filter_text_replaced' in form.data and not form.data.get('filter_text_replaced') \
+                    and 'filter_text_removed' in form.data and not form.data.get('filter_text_removed'):
+                extra_update_obj['filter_text_added'] = True
+                extra_update_obj['filter_text_replaced'] = True
+                extra_update_obj['filter_text_removed'] = True
+
+
            datastore.data['watching'][uuid].update(form.data)
            datastore.data['watching'][uuid].update(extra_update_obj)

@@ -633,6 +657,7 @@ def changedetection_app(config=None, datastore_o=None):
            visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver

            output = render_template("edit.html",
+                                     available_processors=processors.available_processors(),
                                     browser_steps_config=browser_step_ui_config,
                                     current_base_url=datastore.data['settings']['application']['base_url'],
                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
@@ -654,7 +679,7 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route("/settings", methods=['GET', "POST"])
    @login_optionally_required
    def settings_page():
-        from changedetectionio import content_fetcher, forms
+        from . import forms

        default = deepcopy(datastore.data['settings'])
        if datastore.proxy_list is not None:
@@ -735,6 +760,8 @@ def changedetection_app(config=None, datastore_o=None):
    @login_optionally_required
    def import_page():
        remaining_urls = []
+        from . import forms
+
        if request.method == 'POST':
            from .importer import import_url_list, import_distill_io_json

@@ -742,7 +769,7 @@ def changedetection_app(config=None, datastore_o=None):
            if request.values.get('urls') and len(request.values.get('urls').strip()):
                # Import and push into the queue for immediate update check
                importer = import_url_list()
-                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
+                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor'))
                for uuid in importer.new_uuids:
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

@@ -760,9 +787,12 @@ def changedetection_app(config=None, datastore_o=None):
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))


-
+        form = forms.importForm(formdata=request.form if request.method == 'POST' else None,
+#                               data=default,
+                               )
        # Could be some remaining, or we could be on GET
        output = render_template("import.html",
+                                 form=form,
                                 import_url_list_remaining="\n".join(remaining_urls),
                                 original_distill_json=''
                                 )
@@ -828,28 +858,22 @@ def changedetection_app(config=None, datastore_o=None):
        # Save the current newest history as the most recently viewed
        datastore.set_last_viewed(uuid, time.time())

-        newest_file = history[dates[-1]]
-
        # Read as binary and force decode as UTF-8
        # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
        try:
-            with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
-                newest_version_file_contents = f.read()
+            newest_version_file_contents = watch.get_history_snapshot(dates[-1])
        except Exception as e:
-            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
+            newest_version_file_contents = "Unable to read {}.\n".format(dates[-1])

        previous_version = request.args.get('previous_version')
-        try:
-            previous_file = history[previous_version]
-        except KeyError:
-            # Not present, use a default value, the second one in the sorted list.
-            previous_file = history[dates[-2]]
+        previous_timestamp = dates[-2]
+        if previous_version:
+            previous_timestamp = previous_version

        try:
-            with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
-                previous_version_file_contents = f.read()
+            previous_version_file_contents = watch.get_history_snapshot(previous_timestamp)
        except Exception as e:
-            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
+            previous_version_file_contents = "Unable to read {}.\n".format(previous_timestamp)


        screenshot_url = watch.get_screenshot()
@@ -929,37 +953,35 @@ def changedetection_app(config=None, datastore_o=None):
            return output

        timestamp = list(watch.history.keys())[-1]
-        filename = watch.history[timestamp]
        try:
-            with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
-                tmp = f.readlines()
+            tmp = watch.get_history_snapshot(timestamp).splitlines()

-                # Get what needs to be highlighted
-                ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
+            # Get what needs to be highlighted
+            ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']

-                # .readlines will keep the \n, but we will parse it here again, in the future tidy this up
-                ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
-                                                                    wordlist=ignore_rules,
-                                                                    mode='line numbers'
-                                                                    )
+            # .readlines will keep the \n, but we will parse it here again, in the future tidy this up
+            ignored_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
+                                                                wordlist=ignore_rules,
+                                                                mode='line numbers'
+                                                                )

-                trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
-                                                                    wordlist=watch['trigger_text'],
-                                                                    mode='line numbers'
-                                                                    )
-                # Prepare the classes and lines used in the template
-                i=0
-                for l in tmp:
-                    classes=[]
-                    i+=1
-                    if i in ignored_line_numbers:
-                        classes.append('ignored')
-                    if i in trigger_line_numbers:
-                        classes.append('triggered')
-                    content.append({'line': l, 'classes': ' '.join(classes)})
+            trigger_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
+                                                                wordlist=watch['trigger_text'],
+                                                                mode='line numbers'
+                                                                )
+            # Prepare the classes and lines used in the template
+            i=0
+            for l in tmp:
+                classes=[]
+                i+=1
+                if i in ignored_line_numbers:
+                    classes.append('ignored')
+                if i in trigger_line_numbers:
+                    classes.append('triggered')
+                content.append({'line': l, 'classes': ' '.join(classes)})

        except Exception as e:
-            content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''})
+            content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})

        output = render_template("preview.html",
                                 content=content,
@@ -1126,7 +1148,8 @@ def changedetection_app(config=None, datastore_o=None):
            return redirect(url_for('index'))

        add_paused = request.form.get('edit_and_watch_submit_button') != None
-        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
+        processor = request.form.get('processor', 'text_json_diff')
+        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused, 'processor': processor})

        if new_uuid:
            if add_paused:
@@ -1168,7 +1191,8 @@ def changedetection_app(config=None, datastore_o=None):

        new_uuid = datastore.clone(uuid)
        if new_uuid:
-            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+            if not datastore.data['watching'].get(uuid).get('paused'):
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
            flash('Cloned.')

        return redirect(url_for('index'))
@@ -179,9 +179,7 @@ class WatchSingleHistory(Resource):
        if timestamp == 'latest':
            timestamp = list(watch.history.keys())[-1]

-        # @todo - Check for UTF-8 compatability
-        with open(watch.history[timestamp], 'r') as f:
-            content = f.read()
+        content = watch.get_history_snapshot(timestamp)

        response = make_response(content, 200)
        response.mimetype = "text/plain"
@@ -106,8 +106,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):

            if step_operation == 'Goto site':
                step_operation = 'goto_url'
-                step_optional_value = None
-                step_selector = datastore.data['watching'][uuid].get('url')
+                step_optional_value = datastore.data['watching'][uuid].get('url')
+                step_selector = None

            # @todo try.. accept.. nice errors not popups..
            try:
@@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
                          'Execute JS': '0 1',
 #                          'Extract text and use as filter': '1 0',
                          'Goto site': '0 0',
+                          'Goto URL': '0 1',
                          'Press Enter': '0 0',
                          'Select by label': '1 1',
                          'Scroll down': '0 0',
@@ -54,7 +55,7 @@ class steppable_browser_interface():

        print("> action calling", call_action_name)
        # https://playwright.dev/python/docs/selectors#xpath-selectors
-        if selector.startswith('/') and not selector.startswith('//'):
+        if selector and selector.startswith('/') and not selector.startswith('//'):
            selector = "xpath=" + selector

        action_handler = getattr(self, "action_" + call_action_name)
@@ -73,10 +74,10 @@ class steppable_browser_interface():
        self.page.wait_for_timeout(3 * 1000)
        print("Call action done in", time.time() - now)

-    def action_goto_url(self, url, optional_value):
+    def action_goto_url(self, selector, value):
        # self.page.set_viewport_size({"width": 1280, "height": 5000})
        now = time.time()
-        response = self.page.goto(url, timeout=0, wait_until='commit')
+        response = self.page.goto(value, timeout=0, wait_until='commit')

        # Wait_until = commit
        # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
@@ -236,7 +237,7 @@ class browsersteps_live_ui(steppable_browser_interface):
    def get_current_state(self):
        """Return the screenshot and interactive elements mapping, generally always called after action_()"""
        from pkg_resources import resource_string
-        xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
+        xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
        now = time.time()
        self.page.wait_for_timeout(1 * 1000)

@@ -271,8 +272,8 @@ class browsersteps_live_ui(steppable_browser_interface):
        self.page.evaluate("var include_filters=''")
        from pkg_resources import resource_string
        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
-        xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
-        from changedetectionio.content_fetcher import visualselector_xpath_selectors
+        xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
+        from changedetectionio.fetchers import visualselector_xpath_selectors
        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
@@ -13,7 +13,7 @@ import signal
 import socket
 import sys

-from . import store, changedetection_app, content_fetcher
+from . import store, changedetection_app
 from . import __version__

 # Only global so we can access it in the signal handler
@@ -1,594 +0,0 @@
-import hashlib
-from abc import abstractmethod
-import chardet
-import json
-import logging
-import os
-import requests
-import sys
-import time
-
-visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
-
-class Non200ErrorCodeReceived(Exception):
-    def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        self.xpath_data = xpath_data
-        self.page_text = None
-
-        if page_html:
-            from changedetectionio import html_tools
-            self.page_text = html_tools.html_to_text(page_html)
-        return
-
-class checksumFromPreviousCheckWasTheSame(Exception):
-    def __init__(self):
-        return
-
-class JSActionExceptions(Exception):
-    def __init__(self, status_code, url, screenshot, message=''):
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        self.message = message
-        return
-
-class BrowserStepsStepTimout(Exception):
-    def __init__(self, step_n):
-        self.step_n = step_n
-        return
-
-
-class PageUnloadable(Exception):
-    def __init__(self, status_code, url, message, screenshot=False):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        self.message = message
-        return
-
-class EmptyReply(Exception):
-    def __init__(self, status_code, url, screenshot=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        return
-
-class ScreenshotUnavailable(Exception):
-    def __init__(self, status_code, url, page_html=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        if page_html:
-            from html_tools import html_to_text
-            self.page_text = html_to_text(page_html)
-        return
-
-class ReplyWithContentButNoText(Exception):
-    def __init__(self, status_code, url, screenshot=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        return
-
-class Fetcher():
-    error = None
-    status_code = None
-    content = None
-    headers = None
-    browser_steps = None
-    browser_steps_screenshot_path = None
-
-    fetcher_description = "No description"
-    webdriver_js_execute_code = None
-    xpath_element_js = ""
-
-    xpath_data = None
-
-    # Will be needed in the future by the VisualSelector, always get this where possible.
-    screenshot = False
-    system_http_proxy = os.getenv('HTTP_PROXY')
-    system_https_proxy = os.getenv('HTTPS_PROXY')
-
-    # Time ONTOP of the system defined env minimum time
-    render_extract_delay = 0
-
-    def __init__(self):
-        from pkg_resources import resource_string
-        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
-        self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
-
-
-    @abstractmethod
-    def get_error(self):
-        return self.error
-
-    @abstractmethod
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-        # Should set self.error, self.status_code and self.content
-        pass
-
-    @abstractmethod
-    def quit(self):
-        return
-
-    @abstractmethod
-    def get_last_status_code(self):
-        return self.status_code
-
-    @abstractmethod
-    def screenshot_step(self, step_n):
-        return None
-
-    @abstractmethod
-    # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc
-    def is_ready(self):
-        return True
-
-    def iterate_browser_steps(self):
-        from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
-        from playwright._impl._api_types import TimeoutError
-        from jinja2 import Environment
-        jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
-
-        step_n = 0
-
-        if self.browser_steps is not None and len(self.browser_steps):
-            interface = steppable_browser_interface()
-            interface.page = self.page
-
-            valid_steps = filter(lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.browser_steps)
-
-            for step in valid_steps:
-                step_n += 1
-                print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
-                self.screenshot_step("before-"+str(step_n))
-                self.save_step_html("before-"+str(step_n))
-                try:
-                    optional_value = step['optional_value']
-                    selector = step['selector']
-                    # Support for jinja2 template in step values, with date module added
-                    if '{%' in step['optional_value'] or '{{' in step['optional_value']:
-                        optional_value = str(jinja2_env.from_string(step['optional_value']).render())
-                    if '{%' in step['selector'] or '{{' in step['selector']:
-                        selector = str(jinja2_env.from_string(step['selector']).render())
-
-                    getattr(interface, "call_action")(action_name=step['operation'],
-                                                      selector=selector,
-                                                      optional_value=optional_value)
-                    self.screenshot_step(step_n)
-                    self.save_step_html(step_n)
-                except TimeoutError:
-                    # Stop processing here
-                    raise BrowserStepsStepTimout(step_n=step_n)
-
-
-
-    # It's always good to reset these
-    def delete_browser_steps_screenshots(self):
-        import glob
-        if self.browser_steps_screenshot_path is not None:
-            dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg')
-            files = glob.glob(dest)
-            for f in files:
-                os.unlink(f)
-
-#   Maybe for the future, each fetcher provides its own diff output, could be used for text, image
-#   the current one would return javascript output (as we use JS to generate the diff)
-#
-def available_fetchers():
-    # See the if statement at the bottom of this file for how we switch between playwright and webdriver
-    import inspect
-    p = []
-    for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
-        if inspect.isclass(obj):
-            # @todo html_ is maybe better as fetcher_ or something
-            # In this case, make sure to edit the default one in store.py and fetch_site_status.py
-            if name.startswith('html_'):
-                t = tuple([name, obj.fetcher_description])
-                p.append(t)
-
-    return p
-
-class base_html_playwright(Fetcher):
-    fetcher_description = "Playwright {}/Javascript".format(
-        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
-    )
-    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
-        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
-
-    browser_type = ''
-    command_executor = ''
-
-    # Configs for Proxy setup
-    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
-    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
-
-    proxy = None
-
-    def __init__(self, proxy_override=None):
-        super().__init__()
-        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
-        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
-        self.command_executor = os.getenv(
-            "PLAYWRIGHT_DRIVER_URL",
-            'ws://playwright-chrome:3000'
-        ).strip('"')
-
-        # If any proxy settings are enabled, then we should setup the proxy object
-        proxy_args = {}
-        for k in self.playwright_proxy_settings_mappings:
-            v = os.getenv('playwright_proxy_' + k, False)
-            if v:
-                proxy_args[k] = v.strip('"')
-
-        if proxy_args:
-            self.proxy = proxy_args
-
-        # allow per-watch proxy selection override
-        if proxy_override:
-            self.proxy = {'server': proxy_override}
-
-        if self.proxy:
-            # Playwright needs separate username and password values
-            from urllib.parse import urlparse
-            parsed = urlparse(self.proxy.get('server'))
-            if parsed.username:
-                self.proxy['username'] = parsed.username
-                self.proxy['password'] = parsed.password
-
-    def screenshot_step(self, step_n=''):
-        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
-
-        if self.browser_steps_screenshot_path is not None:
-            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
-            logging.debug("Saving step screenshot to {}".format(destination))
-            with open(destination, 'wb') as f:
-                f.write(screenshot)
-
-    def save_step_html(self, step_n):
-        content = self.page.content()
-        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
-        logging.debug("Saving step HTML to {}".format(destination))
-        with open(destination, 'w') as f:
-            f.write(content)
-
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-
-        from playwright.sync_api import sync_playwright
-        import playwright._impl._api_types
-
-        self.delete_browser_steps_screenshots()
-        response = None
-        with sync_playwright() as p:
-            browser_type = getattr(p, self.browser_type)
-
-            # Seemed to cause a connection Exception even tho I can see it connect
-            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
-            # 60,000 connection timeout only
-            browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
-
-            # Set user agent to prevent Cloudflare from blocking the browser
-            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
-            context = browser.new_context(
-                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
-                proxy=self.proxy,
-                # This is needed to enable JavaScript execution on GitHub and others
-                bypass_csp=True,
-                # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
-                service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
-                # Should never be needed
-                accept_downloads=False
-            )
-
-            self.page = context.new_page()
-            if len(request_headers):
-                context.set_extra_http_headers(request_headers)
-
-                self.page.set_default_navigation_timeout(90000)
-                self.page.set_default_timeout(90000)
-
-                # Listen for all console events and handle errors
-                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
-
-            # Goto page
-            try:
-                # Wait_until = commit
-                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
-                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
-                # This seemed to solve nearly all 'TimeoutErrors'
-                response = self.page.goto(url, wait_until='commit')
-            except playwright._impl._api_types.Error as e:
-                # Retry once - https://github.com/browserless/chrome/issues/2485
-                # Sometimes errors related to invalid cert's and other can be random
-                print ("Content Fetcher > retrying request got error - ", str(e))
-                time.sleep(1)
-                response = self.page.goto(url, wait_until='commit')
-
-            except Exception as e:
-                print ("Content Fetcher > Other exception when page.goto", str(e))
-                context.close()
-                browser.close()
-                raise PageUnloadable(url=url, status_code=None, message=str(e))
-
-            # Execute any browser steps
-            try:
-                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
-                self.page.wait_for_timeout(extra_wait * 1000)
-
-                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
-                    self.page.evaluate(self.webdriver_js_execute_code)
-
-            except playwright._impl._api_types.TimeoutError as e:
-                context.close()
-                browser.close()
-                # This can be ok, we will try to grab what we could retrieve
-                pass
-            except Exception as e:
-                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
-                context.close()
-                browser.close()
-                raise PageUnloadable(url=url, status_code=None, message=str(e))
-
-            if response is None:
-                context.close()
-                browser.close()
-                print ("Content Fetcher > Response object was none")
-                raise EmptyReply(url=url, status_code=None)
-
-            # Run Browser Steps here
-            self.iterate_browser_steps()
-
-            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
-            time.sleep(extra_wait)
-
-            self.content = self.page.content()
-            self.status_code = response.status
-            if len(self.page.content().strip()) == 0:
-                context.close()
-                browser.close()
-                print ("Content Fetcher > Content was empty")
-                raise EmptyReply(url=url, status_code=response.status)
-
-            self.status_code = response.status
-            self.content = self.page.content()
-            self.headers = response.all_headers()
-
-            # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
-            if current_include_filters is not None:
-                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
-            else:
-                self.page.evaluate("var include_filters=''")
-
-            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
-
-            # Bug 3 in Playwright screenshot handling
-            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
-            # JPEG is better here because the screenshots can be very very large
-
-            # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
-            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
-            # acceptable screenshot quality here
-            try:
-                # The actual screenshot
-                self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
-            except Exception as e:
-                context.close()
-                browser.close()
-                raise ScreenshotUnavailable(url=url, status_code=None)
-
-            context.close()
-            browser.close()
-
-class base_html_webdriver(Fetcher):
-    if os.getenv("WEBDRIVER_URL"):
-        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
-    else:
-        fetcher_description = "WebDriver Chrome/Javascript"
-
-    command_executor = ''
-
-    # Configs for Proxy setup
-    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
-    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
-                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
-                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
-    proxy = None
-
-    def __init__(self, proxy_override=None):
-        super().__init__()
-        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
-
-        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
-        self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
-
-        # If any proxy settings are enabled, then we should setup the proxy object
-        proxy_args = {}
-        for k in self.selenium_proxy_settings_mappings:
-            v = os.getenv('webdriver_' + k, False)
-            if v:
-                proxy_args[k] = v.strip('"')
-
-        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
-        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
-            proxy_args['httpProxy'] = self.system_http_proxy
-        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
-            proxy_args['httpsProxy'] = self.system_https_proxy
-
-        # Allows override the proxy on a per-request basis
-        if proxy_override is not None:
-            proxy_args['httpProxy'] = proxy_override
-
-        if proxy_args:
-            self.proxy = SeleniumProxy(raw=proxy_args)
-
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-
-        from selenium import webdriver
-        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
-        from selenium.common.exceptions import WebDriverException
-        # request_body, request_method unused for now, until some magic in the future happens.
-
-        # check env for WEBDRIVER_URL
-        self.driver = webdriver.Remote(
-            command_executor=self.command_executor,
-            desired_capabilities=DesiredCapabilities.CHROME,
-            proxy=self.proxy)
-
-        try:
-            self.driver.get(url)
-        except WebDriverException as e:
-            # Be sure we close the session window
-            self.quit()
-            raise
-
-        self.driver.set_window_size(1280, 1024)
-        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
-
-        if self.webdriver_js_execute_code is not None:
-            self.driver.execute_script(self.webdriver_js_execute_code)
-            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
-            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
-
-        # @todo - how to check this? is it possible?
-        self.status_code = 200
-        # @todo somehow we should try to get this working for WebDriver
-        # raise EmptyReply(url=url, status_code=r.status_code)
-
-        # @todo - dom wait loaded?
-        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
-        self.content = self.driver.page_source
-        self.headers = {}
-
-        self.screenshot = self.driver.get_screenshot_as_png()
-
-    # Does the connection to the webdriver work? run a test connection.
-    def is_ready(self):
-        from selenium import webdriver
-        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
-
-        self.driver = webdriver.Remote(
-            command_executor=self.command_executor,
-            desired_capabilities=DesiredCapabilities.CHROME)
-
-        # driver.quit() seems to cause better exceptions
-        self.quit()
-        return True
-
-    def quit(self):
-        if self.driver:
-            try:
-                self.driver.quit()
-            except Exception as e:
-                print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
-
-
-# "html_requests" is listed as the default fetcher in store.py!
-class html_requests(Fetcher):
-    fetcher_description = "Basic fast Plaintext/HTTP Client"
-
-    def __init__(self, proxy_override=None):
-        self.proxy_override = proxy_override
-
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-
-        # Make requests use a more modern looking user-agent
-        if not 'User-Agent' in request_headers:
-            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
-                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
-
-        proxies = {}
-
-        # Allows override the proxy on a per-request basis
-        if self.proxy_override:
-            proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
-        else:
-            if self.system_http_proxy:
-                proxies['http'] = self.system_http_proxy
-            if self.system_https_proxy:
-                proxies['https'] = self.system_https_proxy
-
-        r = requests.request(method=request_method,
-                             data=request_body,
-                             url=url,
-                             headers=request_headers,
-                             timeout=timeout,
-                             proxies=proxies,
-                             verify=False)
-
-        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
-        # For example - some sites don't tell us it's utf-8, but return utf-8 content
-        # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
-        # https://github.com/psf/requests/issues/1604 good info about requests encoding detection
-        if not is_binary:
-            # Don't run this for PDF (and requests identified as binary) takes a _long_ time
-            if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
-                encoding = chardet.detect(r.content)['encoding']
-                if encoding:
-                    r.encoding = encoding
-
-        if not r.content or not len(r.content):
-            raise EmptyReply(url=url, status_code=r.status_code)
-
-        # @todo test this
-        # @todo maybe you really want to test zero-byte return pages?
-        if r.status_code != 200 and not ignore_status_codes:
-            # maybe check with content works?
-            raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
-
-        self.status_code = r.status_code
-        if is_binary:
-            # Binary files just return their checksum until we add something smarter
-            self.content = hashlib.md5(r.content).hexdigest()
-        else:
-            self.content = r.text
-
-        self.headers = r.headers
-        self.raw_content = r.content
-
-
-# Decide which is the 'real' HTML webdriver, this is more a system wide config
-# rather than site-specific.
-use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
-if use_playwright_as_chrome_fetcher:
-    html_webdriver = base_html_playwright
-else:
-    html_webdriver = base_html_webdriver
@@ -10,7 +10,7 @@ def same_slicer(l, a, b):
        return l[a:b]

 # like .compare but a little different output
-def customSequenceMatcher(before, after, include_equal=False):
+def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True, include_replaced=True, include_change_type_prefix=True):
    cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)

    # @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
@@ -18,34 +18,39 @@ def customSequenceMatcher(before, after, include_equal=False):
        if include_equal and tag == 'equal':
            g = before[alo:ahi]
            yield g
-        elif tag == 'delete':
-            g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
+        elif include_removed and tag == 'delete':
+            row_prefix = "(removed) " if include_change_type_prefix else ''
+            g = [ row_prefix + i for i in same_slicer(before, alo, ahi)]
            yield g
-        elif tag == 'replace':
-            g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
-            g += ["(into   ) " + i for i in same_slicer(after, blo, bhi)]
+        elif include_replaced and tag == 'replace':
+            row_prefix = "(changed) " if include_change_type_prefix else ''
+            g = [row_prefix + i for i in same_slicer(before, alo, ahi)]
+            row_prefix = "(into) " if include_change_type_prefix else ''
+            g += [row_prefix + i for i in same_slicer(after, blo, bhi)]
            yield g
-        elif tag == 'insert':
-            g = ["(added  ) " + i for i in same_slicer(after, blo, bhi)]
+        elif include_added and tag == 'insert':
+            row_prefix = "(added) " if include_change_type_prefix else ''
+            g = [row_prefix + i for i in same_slicer(after, blo, bhi)]
            yield g

 # only_differences - only return info about the differences, no context
-# line_feed_sep could be "<br/>" or "<li>" or "\n" etc
-def render_diff(previous_file, newest_file, include_equal=False, line_feed_sep="\n"):
-    with open(newest_file, 'r') as f:
-        newest_version_file_contents = f.read()
-        newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
+# line_feed_sep could be "<br>" or "<li>" or "\n" etc
+def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, include_replaced=True, line_feed_sep="\n", include_change_type_prefix=True):

-    if previous_file:
-        with open(previous_file, 'r') as f:
-            previous_version_file_contents = f.read()
+    newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
+
+    if previous_version_file_contents:
            previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()]
    else:
        previous_version_file_contents = ""

-    rendered_diff = customSequenceMatcher(previous_version_file_contents,
-                                          newest_version_file_contents,
-                                          include_equal)
+    rendered_diff = customSequenceMatcher(before=previous_version_file_contents,
+                                          after=newest_version_file_contents,
+                                          include_equal=include_equal,
+                                          include_removed=include_removed,
+                                          include_added=include_added,
+                                          include_replaced=include_replaced,
+                                          include_change_type_prefix=include_change_type_prefix)

    # Recursively join lists
    f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L])
@@ -0,0 +1,150 @@
+from abc import abstractmethod
+import os
+from . import exceptions
+
+visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
+
+
+class Fetcher():
+    browser_steps = None
+    browser_steps_screenshot_path = None
+    content = None
+    error = None
+    fetcher_description = "No description"
+    headers = None
+    status_code = None
+    webdriver_js_execute_code = None
+    xpath_data = None
+    xpath_element_js = ""
+    instock_data = None
+    instock_data_js = ""
+
+    # Will be needed in the future by the VisualSelector, always get this where possible.
+    screenshot = False
+    system_http_proxy = os.getenv('HTTP_PROXY')
+    system_https_proxy = os.getenv('HTTPS_PROXY')
+
+    # Time ONTOP of the system defined env minimum time
+    render_extract_delay = 0
+
+    def __init__(self):
+        from pkg_resources import resource_string
+        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
+        self.xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
+        self.instock_data_js = resource_string(__name__, "../res/stock-not-in-stock.js").decode('utf-8')
+
+
+    @abstractmethod
+    def get_error(self):
+        return self.error
+
+    @abstractmethod
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+        # Should set self.error, self.status_code and self.content
+        pass
+
+    @abstractmethod
+    def quit(self):
+        return
+
+    @abstractmethod
+    def get_last_status_code(self):
+        return self.status_code
+
+    @abstractmethod
+    def screenshot_step(self, step_n):
+        return None
+
+    @abstractmethod
+    # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc
+    def is_ready(self):
+        return True
+
+    def iterate_browser_steps(self):
+        from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
+        from playwright._impl._api_types import TimeoutError
+        from jinja2 import Environment
+        jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
+
+        step_n = 0
+
+        if self.browser_steps is not None and len(self.browser_steps):
+            interface = steppable_browser_interface()
+            interface.page = self.page
+
+            valid_steps = filter(lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.browser_steps)
+
+            for step in valid_steps:
+                step_n += 1
+                print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
+                self.screenshot_step("before-"+str(step_n))
+                self.save_step_html("before-"+str(step_n))
+                try:
+                    optional_value = step['optional_value']
+                    selector = step['selector']
+                    # Support for jinja2 template in step values, with date module added
+                    if '{%' in step['optional_value'] or '{{' in step['optional_value']:
+                        optional_value = str(jinja2_env.from_string(step['optional_value']).render())
+                    if '{%' in step['selector'] or '{{' in step['selector']:
+                        selector = str(jinja2_env.from_string(step['selector']).render())
+
+                    getattr(interface, "call_action")(action_name=step['operation'],
+                                                      selector=selector,
+                                                      optional_value=optional_value)
+                    self.screenshot_step(step_n)
+                    self.save_step_html(step_n)
+                except TimeoutError:
+                    # Stop processing here
+                    raise exceptions.BrowserStepsStepTimout(step_n=step_n)
+
+
+
+    # It's always good to reset these
+    def delete_browser_steps_screenshots(self):
+        import glob
+        if self.browser_steps_screenshot_path is not None:
+            dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg')
+            files = glob.glob(dest)
+            for f in files:
+                os.unlink(f)
+
+#   Maybe for the future, each fetcher provides its own diff output, could be used for text, image
+#   the current one would return javascript output (as we use JS to generate the diff)
+#
+
+
+def available_fetchers():
+    from . import playwright, html_requests, webdriver
+
+    p = []
+    p.append(tuple(['html_requests', html_requests.fetcher.fetcher_description]))
+
+    # Prefer playwright
+    if os.getenv('PLAYWRIGHT_DRIVER_URL', False):
+        p.append(tuple(['html_webdriver', playwright.fetcher.fetcher_description]))
+
+    elif os.getenv('WEBDRIVER_URL'):
+        p.append(tuple(['html_webdriver', webdriver.fetcher.fetcher_description]))
+
+
+    return p
+
+html_webdriver = None
+# Decide which is the 'real' HTML webdriver, this is more a system wide config rather than site-specific.
+use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
+if use_playwright_as_chrome_fetcher:
+    from . import playwright
+    html_webdriver = getattr(playwright, "fetcher")
+
+else:
+    from . import webdriver
+    html_webdriver = getattr(webdriver, "fetcher")
+
@@ -0,0 +1,71 @@
+from . import Fetcher
+import os
+import requests
+
+
+# Exploit the debugging API to get screenshot and HTML without needing playwright
+# https://www.browserless.io/docs/scrape#debugging
+
+class fetcher(Fetcher):
+    fetcher_description = "Browserless Chrome/Javascript via '{}'".format(os.getenv("BROWSERLESS_DRIVER_URL"))
+
+    command_executor = ''
+    proxy = None
+
+    def __init__(self, proxy_override=None, command_executor=None):
+        super().__init__()
+        self.proxy = proxy_override
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        proxy = ""
+        if self.proxy:
+            proxy = f"--proxy-server={self.proxy}"
+
+        import json
+        r = requests.request(method='POST',
+                             data=json.dumps({
+                                 "url": f"{url}?{proxy}",
+                                 "elements": [],
+                                 "debug": {
+                                     "screenshot": True,
+                                     "console": False,
+                                     "network": True,
+                                     "cookies": False,
+                                     "html": True
+                                 }
+                             }),
+                             url=os.getenv("BROWSERLESS_DRIVER_URL"),
+                             headers={'Content-Type': 'application/json'},
+                             timeout=timeout,
+                             verify=False)
+
+        # "waitFor": "() => document.querySelector('h1')"
+        #        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+        #        self.page.wait_for_timeout(extra_wait * 1000)
+
+        if r.status_code == 200:
+            # the basic request to browserless was OK, but how was the internal request to the site?
+            result = r.json()
+
+            if result['debug']['network'].get('inbound') and len(result['debug']['network']['inbound']):
+                self.status_code = result['debug']['network']['inbound'][000]['status']
+
+            self.content = result['debug']['html']
+
+            self.headers = {}
+            if result['debug'].get('screenshot'):
+                import base64
+                self.screenshot = base64.b64decode(result['debug']['screenshot'])
+
+    def is_ready(self):
+        # Try ping?
+        return os.getenv("BROWSERLESS_DRIVER_URL", False)
@@ -0,0 +1,66 @@
+class Non200ErrorCodeReceived(Exception):
+    def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        self.xpath_data = xpath_data
+        self.page_text = None
+
+        if page_html:
+            from changedetectionio import html_tools
+            self.page_text = html_tools.html_to_text(page_html)
+        return
+
+class checksumFromPreviousCheckWasTheSame(Exception):
+    def __init__(self):
+        return
+
+class JSActionExceptions(Exception):
+    def __init__(self, status_code, url, screenshot, message=''):
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        self.message = message
+        return
+
+class BrowserStepsStepTimout(Exception):
+    def __init__(self, step_n):
+        self.step_n = step_n
+        return
+
+
+class PageUnloadable(Exception):
+    def __init__(self, status_code, url, message, screenshot=False):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        self.message = message
+        return
+
+class EmptyReply(Exception):
+    def __init__(self, status_code, url, screenshot=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        return
+
+class ScreenshotUnavailable(Exception):
+    def __init__(self, status_code, url, page_html=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        if page_html:
+            from ..html_tools import html_to_text
+            self.page_text = html_to_text(page_html)
+        return
+
+class ReplyWithContentButNoText(Exception):
+    def __init__(self, status_code, url, screenshot=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        return
@@ -0,0 +1,80 @@
+from . import Fetcher
+from . import exceptions
+
+
+# "html_requests" is listed as the default fetcher in store.py!
+class fetcher(Fetcher):
+    fetcher_description = "Basic fast Plaintext/HTTP Client"
+
+
+    def __init__(self, proxy_override=None):
+        self.proxy_override = proxy_override
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        import chardet
+        import hashlib
+        import os
+        import requests
+
+        # Make requests use a more modern looking user-agent
+        if not 'User-Agent' in request_headers:
+            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
+                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
+
+        proxies = {}
+
+        # Allows override the proxy on a per-request basis
+        if self.proxy_override:
+            proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
+        else:
+            if self.system_http_proxy:
+                proxies['http'] = self.system_http_proxy
+            if self.system_https_proxy:
+                proxies['https'] = self.system_https_proxy
+
+        r = requests.request(method=request_method,
+                             data=request_body,
+                             url=url,
+                             headers=request_headers,
+                             timeout=timeout,
+                             proxies=proxies,
+                             verify=False)
+
+        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
+        # For example - some sites don't tell us it's utf-8, but return utf-8 content
+        # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
+        # https://github.com/psf/requests/issues/1604 good info about requests encoding detection
+        if not is_binary:
+            # Don't run this for PDF (and requests identified as binary) takes a _long_ time
+            if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
+                encoding = chardet.detect(r.content)['encoding']
+                if encoding:
+                    r.encoding = encoding
+
+        if not r.content or not len(r.content):
+            raise exceptions.EmptyReply(url=url, status_code=r.status_code)
+
+        # @todo test this
+        # @todo maybe you really want to test zero-byte return pages?
+        if r.status_code != 200 and not ignore_status_codes:
+            # maybe check with content works?
+            raise exceptions.Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
+
+        self.status_code = r.status_code
+        if is_binary:
+            # Binary files just return their checksum until we add something smarter
+            self.content = hashlib.md5(r.content).hexdigest()
+        else:
+            self.content = r.text
+
+        self.headers = r.headers
+        self.raw_content = r.content
@@ -0,0 +1,208 @@
+from . import Fetcher
+from . import exceptions
+from . import visualselector_xpath_selectors
+
+import os
+import logging
+import time
+
+class fetcher(Fetcher):
+    fetcher_description = "Playwright {}/Javascript".format(
+        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
+    )
+    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
+        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
+
+    browser_type = ''
+    command_executor = ''
+
+    # Configs for Proxy setup
+    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
+    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
+
+    proxy = None
+
+    def __init__(self, proxy_override=None):
+        super().__init__()
+        import json
+
+        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
+        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
+        self.command_executor = os.getenv(
+            "PLAYWRIGHT_DRIVER_URL",
+            'ws://playwright-chrome:3000'
+        ).strip('"')
+
+        # If any proxy settings are enabled, then we should setup the proxy object
+        proxy_args = {}
+        for k in self.playwright_proxy_settings_mappings:
+            v = os.getenv('playwright_proxy_' + k, False)
+            if v:
+                proxy_args[k] = v.strip('"')
+
+        if proxy_args:
+            self.proxy = proxy_args
+
+        # allow per-watch proxy selection override
+        if proxy_override:
+            self.proxy = {'server': proxy_override}
+
+        if self.proxy:
+            # Playwright needs separate username and password values
+            from urllib.parse import urlparse
+            parsed = urlparse(self.proxy.get('server'))
+            if parsed.username:
+                self.proxy['username'] = parsed.username
+                self.proxy['password'] = parsed.password
+
+    def screenshot_step(self, step_n=''):
+        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
+
+        if self.browser_steps_screenshot_path is not None:
+            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
+            logging.debug("Saving step screenshot to {}".format(destination))
+            with open(destination, 'wb') as f:
+                f.write(screenshot)
+
+    def save_step_html(self, step_n):
+        content = self.page.content()
+        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
+        logging.debug("Saving step HTML to {}".format(destination))
+        with open(destination, 'w') as f:
+            f.write(content)
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        from playwright.sync_api import sync_playwright
+        import playwright._impl._api_types
+        import json
+
+        self.delete_browser_steps_screenshots()
+        response = None
+        with sync_playwright() as p:
+            browser_type = getattr(p, self.browser_type)
+
+            # Seemed to cause a connection Exception even tho I can see it connect
+            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
+            # 60,000 connection timeout only
+            browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
+
+            # Set user agent to prevent Cloudflare from blocking the browser
+            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
+            context = browser.new_context(
+                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
+                proxy=self.proxy,
+                # This is needed to enable JavaScript execution on GitHub and others
+                bypass_csp=True,
+                # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
+                service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
+                # Should never be needed
+                accept_downloads=False
+            )
+
+            self.page = context.new_page()
+            if len(request_headers):
+                context.set_extra_http_headers(request_headers)
+
+                self.page.set_default_navigation_timeout(90000)
+                self.page.set_default_timeout(90000)
+
+                # Listen for all console events and handle errors
+                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
+
+            # Goto page
+            try:
+                # Wait_until = commit
+                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
+                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
+                # This seemed to solve nearly all 'TimeoutErrors'
+                response = self.page.goto(url, wait_until='commit')
+            except playwright._impl._api_types.Error as e:
+                # Retry once - https://github.com/browserless/chrome/issues/2485
+                # Sometimes errors related to invalid cert's and other can be random
+                print ("Content Fetcher > retrying request got error - ", str(e))
+                time.sleep(1)
+                response = self.page.goto(url, wait_until='commit')
+
+            except Exception as e:
+                print ("Content Fetcher > Other exception when page.goto", str(e))
+                context.close()
+                browser.close()
+                raise exceptions.PageUnloadable(url=url, status_code=None, message=str(e))
+
+            # Execute any browser steps
+            try:
+                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+                self.page.wait_for_timeout(extra_wait * 1000)
+
+                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
+                    self.page.evaluate(self.webdriver_js_execute_code)
+
+            except playwright._impl._api_types.TimeoutError as e:
+                context.close()
+                browser.close()
+                # This can be ok, we will try to grab what we could retrieve
+                pass
+            except Exception as e:
+                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
+                context.close()
+                browser.close()
+                raise exceptions.PageUnloadable(url=url, status_code=None, message=str(e))
+
+            if response is None:
+                context.close()
+                browser.close()
+                print ("Content Fetcher > Response object was none")
+                raise exceptions.EmptyReply(url=url, status_code=None)
+
+            # Run Browser Steps here
+            self.iterate_browser_steps()
+
+            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+            time.sleep(extra_wait)
+
+            self.content = self.page.content()
+            self.status_code = response.status
+            if len(self.page.content().strip()) == 0:
+                context.close()
+                browser.close()
+                print ("Content Fetcher > Content was empty")
+                raise exceptions.EmptyReply(url=url, status_code=response.status)
+
+            self.status_code = response.status
+            self.headers = response.all_headers()
+
+            # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
+            if current_include_filters is not None:
+                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
+            else:
+                self.page.evaluate("var include_filters=''")
+
+            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
+            self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
+
+            # Bug 3 in Playwright screenshot handling
+            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
+            # JPEG is better here because the screenshots can be very very large
+
+            # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
+            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
+            # acceptable screenshot quality here
+            try:
+                # The actual screenshot
+                self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
+            except Exception as e:
+                context.close()
+                browser.close()
+                raise exceptions.ScreenshotUnavailable(url=url, status_code=None)
+
+            context.close()
+            browser.close()
@@ -0,0 +1,103 @@
+from . import Fetcher
+import os
+import time
+
+class fetcher(Fetcher):
+    if os.getenv("WEBDRIVER_URL"):
+        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
+    else:
+        fetcher_description = "WebDriver Chrome/Javascript"
+
+    command_executor = ''
+
+    # Configs for Proxy setup
+    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
+    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
+                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
+                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
+    proxy = None
+
+    def __init__(self, proxy_override=None, command_executor=None):
+        super().__init__()
+        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
+
+        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
+        if command_executor:
+            self.command_executor = command_executor
+        else:
+            self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
+
+        # If any proxy settings are enabled, then we should setup the proxy object
+        proxy_args = {}
+        for k in self.selenium_proxy_settings_mappings:
+            v = os.getenv('webdriver_' + k, False)
+            if v:
+                proxy_args[k] = v.strip('"')
+
+        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
+        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
+            proxy_args['httpProxy'] = self.system_http_proxy
+        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
+            proxy_args['httpsProxy'] = self.system_https_proxy
+
+        # Allows override the proxy on a per-request basis
+        if proxy_override is not None:
+            proxy_args['httpProxy'] = proxy_override
+
+        if proxy_args:
+            self.proxy = SeleniumProxy(raw=proxy_args)
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        from selenium import webdriver
+        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+        from selenium.common.exceptions import WebDriverException
+        # request_body, request_method unused for now, until some magic in the future happens.
+
+        # check env for WEBDRIVER_URL
+        self.driver = webdriver.Remote(
+            command_executor=self.command_executor,
+            desired_capabilities=DesiredCapabilities.CHROME,
+            proxy=self.proxy
+        )
+
+        try:
+            self.driver.get(url)
+        except WebDriverException as e:
+            # Be sure we close the session window
+            self.quit()
+            raise
+
+        self.driver.set_window_size(1280, 1024)
+        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+
+        if self.webdriver_js_execute_code is not None:
+            self.driver.execute_script(self.webdriver_js_execute_code)
+            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
+            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+
+        # @todo - how to check this? is it possible?
+        self.status_code = 200
+        # @todo somehow we should try to get this working for WebDriver
+        # raise EmptyReply(url=url, status_code=r.status_code)
+
+        # @todo - dom wait loaded?
+        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
+        self.content = self.driver.page_source
+        self.headers = {}
+
+        self.screenshot = self.driver.get_screenshot_as_png()
+
+    # Try something with requests?
+    def is_ready(self):
+        return True
+
+
@@ -21,7 +21,6 @@ from wtforms.validators import ValidationError
 # each select <option data-enabled="enabled-0-0"
 from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config

-from changedetectionio import content_fetcher
 from changedetectionio.notification import (
    valid_notification_formats,
 )
@@ -135,30 +134,31 @@ class ValidateContentFetcherIsReady(object):

    def __call__(self, form, field):
        import urllib3.exceptions
-        from changedetectionio import content_fetcher
+        import importlib

        # Better would be a radiohandler that keeps a reference to each class
        if field.data is not None and field.data != 'system':
-            klass = getattr(content_fetcher, field.data)
-            some_object = klass()
-            try:
-                ready = some_object.is_ready()
+            from . import fetchers
+            if fetchers.html_webdriver is not None:
+                try:
+                    driver = fetchers.html_webdriver()
+                    driver.is_ready()

-            except urllib3.exceptions.MaxRetryError as e:
-                driver_url = some_object.command_executor
-                message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
-                message += '<br/>' + field.gettext(
-                    'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
-                message += '<br/>' + field.gettext('Did you follow the instructions in the wiki?')
-                message += '<br/><br/>' + field.gettext('WebDriver Host: %s' % (driver_url))
-                message += '<br/><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
-                message += '<br/>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
+                except urllib3.exceptions.MaxRetryError as e:
+                    driver_url = fetchers.html_webdriver.command_executor
+                    message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
+                    message += '<br>' + field.gettext(
+                        'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
+                    message += '<br>' + field.gettext('Did you follow the instructions in the wiki?')
+                    message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url))
+                    message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
+                    message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))

-                raise ValidationError(message)
+                    raise ValidationError(message)

-            except Exception as e:
-                message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s')
-                raise ValidationError(message % (field.data, e))
+                except Exception as e:
+                    message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s')
+                    raise ValidationError(message % (field.data, e))


 class ValidateNotificationBodyAndTitleWhenURLisSet(object):
@@ -344,23 +344,30 @@ class ValidateCSSJSONXPATHInput(object):
                    raise ValidationError("A system-error occurred when validating your jq expression")

 class quickWatchForm(Form):
+    from . import processors
+
    url = fields.URLField('URL', validators=[validateURL()])
    tag = StringField('Group tag', [validators.Optional()])
    watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
+    processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
    edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})


-
 # Common to a single watch and the global settings
 class commonSettingsForm(Form):
+    from .fetchers import available_fetchers
    notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers()])
    notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
    notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
    notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
-    fetch_backend = RadioField(u'Fetch Method', choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
+    fetch_backend = RadioField(u'Fetch Method', choices=available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
    webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1,
                                                                                                                                    message="Should contain one or more seconds")])
+class importForm(Form):
+    from . import processors
+    processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
+    urls = TextAreaField('URLs')

 class SingleBrowserStep(Form):

@@ -393,11 +400,19 @@ class watchForm(commonSettingsForm):
    body = TextAreaField('Request body', [validators.Optional()])
    method = SelectField('Request method', choices=valid_method, default=default_method)
    ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
-    check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
+    check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
+
+    filter_text_added = BooleanField('Added lines', default=True)
+    filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
+    filter_text_removed = BooleanField('Removed lines', default=True)
+
+    # @todo this class could be moved to its own text_json_diff_watchForm and this goes to restock_diff_Watchform perhaps
+    in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
+
    trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
        browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
-    text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
+    text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
    webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])

    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -458,11 +473,11 @@ class globalSettingsRequestForm(Form):

 # datastore.data['settings']['application']..
 class globalSettingsApplicationForm(commonSettingsForm):
-
+    from .fetchers import available_fetchers
    api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
    base_url = StringField('Base URL', validators=[validators.Optional()])
    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
-    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
+    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
    ignore_whitespace = BooleanField('Ignore whitespace')
@@ -8,7 +8,7 @@ import json
 import re

 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
-TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
+TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"

 # 'price' , 'lowPrice', 'highPrice' are usually under here
 # all of those may or may not appear on different websites
@@ -287,3 +287,18 @@ def workarounds_for_obfuscations(content):
    content = re.sub('<!--\s+-->', '', content)

    return content
+
+
+def get_triggered_text(content, trigger_text):
+    triggered_text = []
+    result = strip_ignore_text(content=content,
+                               wordlist=trigger_text,
+                               mode="line numbers")
+
+    i = 1
+    for p in content.splitlines():
+        if i in result:
+            triggered_text.append(p)
+        i += 1
+
+    return triggered_text
@@ -29,6 +29,7 @@ class import_url_list(Importer):
            data,
            flash,
            datastore,
+            processor=None
            ):

        urls = data.split("\n")
@@ -52,7 +53,11 @@ class import_url_list(Importer):
            # Flask wtform validators wont work with basic auth, use validators package
            # Up to 5000 per batch so we dont flood the server
            if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
-                new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False)
+                extras = None
+                if processor:
+                    extras = {'processor': processor}
+                new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
+
                if new_uuid:
                    # Straight into the queue.
                    self.new_uuids.append(new_uuid)
@@ -23,12 +23,17 @@ base_config = {
    'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine.
    'extract_text': [],  # Extract text by regex after filters
    'extract_title_as_title': False,
-    'fetch_backend': 'system',
+    'fetch_backend': 'system', # plaintext, playwright etc
+    'processor': 'text_json_diff', # could be restock_diff or others from .processors
    'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
+    'filter_text_added': True,
+    'filter_text_replaced': True,
+    'filter_text_removed': True,
    'has_ldjson_price_data': None,
    'track_ldjson_price_data': None,
    'headers': {},  # Extra headers to send
    'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
+    'in_stock_only' : True, # Only trigger change on going to instock from out-of-stock
    'include_filters': [],
    'last_checked': 0,
    'last_error': False,
@@ -239,9 +244,32 @@ class model(dict):
        bump = self.history
        return self.__newest_history_key

+    def get_history_snapshot(self, timestamp):
+        import brotli
+        filepath = self.history[timestamp]
+
+        # See if a brotli versions exists and switch to that
+        if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
+            filepath = f"{filepath}.br"
+
+        # OR in the backup case that the .br does not exist, but the plain one does
+        if filepath.endswith('.br') and not os.path.isfile(filepath):
+            if os.path.isfile(filepath.replace('.br', '')):
+                filepath = filepath.replace('.br', '')
+
+        if filepath.endswith('.br'):
+            # Brotli doesnt have a fileheader to detect it, so we rely on filename
+            # https://www.rfc-editor.org/rfc/rfc7932
+            with open(filepath, 'rb') as f:
+                return(brotli.decompress(f.read()).decode('utf-8'))
+
+        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+            return f.read()
+
    # Save some text file to the appropriate path and bump the history
    # result_obj from fetch_site_status.run()
    def save_history_text(self, contents, timestamp, snapshot_id):
+        import brotli

        self.ensure_data_dir_exists()

@@ -250,16 +278,21 @@ class model(dict):
        if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
            time.sleep(timestamp - self.__newest_history_key)

-        snapshot_fname = f"{snapshot_id}.txt"
+        threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
+        skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))

-        # Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames.
-        dest = os.path.join(self.watch_data_dir, snapshot_fname)
-        if not os.path.exists(dest):
-            # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
-            # most sites are utf-8 and some are even broken utf-8
-            with open(dest, 'wb') as f:
-                f.write(contents)
-                f.close()
+        if not skip_brotli and len(contents) > threshold:
+            snapshot_fname = f"{snapshot_id}.txt.br"
+            dest = os.path.join(self.watch_data_dir, snapshot_fname)
+            if not os.path.exists(dest):
+                with open(dest, 'wb') as f:
+                    f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
+        else:
+            snapshot_fname = f"{snapshot_id}.txt"
+            dest = os.path.join(self.watch_data_dir, snapshot_fname)
+            if not os.path.exists(dest):
+                with open(dest, 'wb') as f:
+                    f.write(contents)

        # Append to index
        # @todo check last char was \n
@@ -296,7 +329,8 @@ class model(dict):
        # Compare each lines (set) against each history text file (set) looking for something new..
        existing_history = set({})
        for k, v in self.history.items():
-            alist = set([line.decode('utf-8').strip().lower() for line in open(v, 'rb')])
+            content = self.get_history_snapshot(k)
+            alist = set([line.strip().lower() for line in content.splitlines()])
            existing_history = existing_history.union(alist)

        # Check that everything in local_lines(new stuff) already exists in existing_history - it should
@@ -311,17 +345,6 @@ class model(dict):
        # False is not an option for AppRise, must be type None
        return None

-    def get_screenshot_as_jpeg(self):
-
-        # Created by save_screenshot()
-        fname = os.path.join(self.watch_data_dir, "last-screenshot.jpg")
-        if os.path.isfile(fname):
-            return fname
-
-        # False is not an option for AppRise, must be type None
-        return None
-
-
    def __get_file_ctime(self, filename):
        fname = os.path.join(self.watch_data_dir, filename)
        if os.path.isfile(fname):
@@ -368,6 +391,7 @@ class model(dict):
            return fname
        return False

+
    def pause(self):
        self['paused'] = True

@@ -397,8 +421,8 @@ class model(dict):
        # self.history will be keyed with the full path
        for k, fname in self.history.items():
            if os.path.isfile(fname):
-                with open(fname, "r") as f:
-                    contents = f.read()
+                if True:
+                    contents = self.get_history_snapshot(k)
                    res = re.findall(regex, contents, re.MULTILINE)
                    if res:
                        if not csv_writer:
@@ -434,3 +458,38 @@ class model(dict):
    # Return list of tags, stripped and lowercase, used for searching
    def all_tags(self):
        return [s.strip().lower() for s in self.get('tag','').split(',')]
+
+    def has_special_diff_filter_options_set(self):
+
+        # All False - nothing would be done, so act like it's not processable
+        if not self.get('filter_text_added', True) and not self.get('filter_text_replaced', True) and not self.get('filter_text_removed', True):
+            return False
+
+        # Or one is set
+        if not self.get('filter_text_added', True) or not self.get('filter_text_replaced', True) or not self.get('filter_text_removed', True):
+            return True
+
+        # None is set
+        return False
+
+
+    def get_last_fetched_before_filters(self):
+        import brotli
+        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
+
+        if not os.path.isfile(filepath):
+            # If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
+            dates = list(self.history.keys())
+            if len(dates):
+                return self.get_history_snapshot(dates[-1])
+            else:
+                return ''
+
+        with open(filepath, 'rb') as f:
+            return(brotli.decompress(f.read()).decode('utf-8'))
+
+    def save_last_fetched_before_filters(self, contents):
+        import brotli
+        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
+        with open(filepath, 'wb') as f:
+            f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
@@ -5,15 +5,18 @@ import json

 valid_tokens = {
    'base_url': '',
-    'watch_url': '',
-    'watch_uuid': '',
-    'watch_title': '',
-    'watch_tag': '',
+    'current_snapshot': '',
    'diff': '',
+    'diff_added': '',
    'diff_full': '',
+    'diff_removed': '',
    'diff_url': '',
    'preview_url': '',
-    'current_snapshot': ''
+    'triggered_text': '',
+    'watch_tag': '',
+    'watch_title': '',
+    'watch_url': '',
+    'watch_uuid': '',
 }

 default_notification_format_for_watch = 'System default'
@@ -120,10 +123,10 @@ def process_notification(n_object, datastore):
                    url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'

                if url.startswith('tgram://'):
-                    # Telegram only supports a limit subset of HTML, remove the '<br/>' we place in.
+                    # Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
                    # re https://github.com/dgtlmoon/changedetection.io/issues/555
                    # @todo re-use an existing library we have already imported to strip all non-allowed tags
-                    n_body = n_body.replace('<br/>', '\n')
+                    n_body = n_body.replace('<br>', '\n')
                    n_body = n_body.replace('</br>', '\n')
                    # real limit is 4096, but minus some for extra metadata
                    payload_max_size = 3600
@@ -209,15 +212,18 @@ def create_notification_parameters(n_object, datastore):
    tokens.update(
        {
            'base_url': base_url if base_url is not None else '',
+            'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else '',
+            'diff': n_object.get('diff', ''),  # Null default in the case we use a test
+            'diff_added': n_object.get('diff_added', ''),  # Null default in the case we use a test
+            'diff_full': n_object.get('diff_full', ''),  # Null default in the case we use a test
+            'diff_removed': n_object.get('diff_removed', ''),  # Null default in the case we use a test
+            'diff_url': diff_url,
+            'preview_url': preview_url,
+            'triggered_text': n_object.get('triggered_text', ''),
+            'watch_tag': watch_tag if watch_tag is not None else '',
+            'watch_title': watch_title if watch_title is not None else '',
            'watch_url': watch_url,
            'watch_uuid': uuid,
-            'watch_title': watch_title if watch_title is not None else '',
-            'watch_tag': watch_tag if watch_tag is not None else '',
-            'diff_url': diff_url,
-            'diff': n_object.get('diff', ''),  # Null default in the case we use a test
-            'diff_full': n_object.get('diff_full', ''),  # Null default in the case we use a test
-            'preview_url': preview_url,
-            'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else ''
        })

    return tokens
@@ -0,0 +1,11 @@
+# Change detection post-processors
+
+The concept here is to be able to switch between different domain specific problems to solve.
+
+- `text_json_diff` The traditional text and JSON comparison handler
+- `restock_diff` Only cares about detecting if a product looks like it has some text that suggests that it's out of stock, otherwise assumes that it's in stock.
+
+Some suggestions for the future
+
+- `graphical` 
+- `restock_and_price` - extract price AND stock text
@@ -0,0 +1,24 @@
+from abc import abstractmethod
+import hashlib
+
+
+class difference_detection_processor():
+
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @abstractmethod
+    def run(self, uuid, skip_when_checksum_same=True):
+        update_obj = {'last_notification_error': False, 'last_error': False}
+        some_data = 'xxxxx'
+        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
+        changed_detected = False
+        return changed_detected, update_obj, ''.encode('utf-8')
+
+
+def available_processors():
+    from . import restock_diff, text_json_diff
+    x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
+    # @todo Make this smarter with introspection of sorts.
+    return x
@@ -0,0 +1,126 @@
+
+import hashlib
+import os
+import re
+import urllib3
+from . import difference_detection_processor
+from copy import deepcopy
+from .. import fetchers
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+name = 'Re-stock detection for single product pages'
+description = 'Detects if the product goes back to in-stock'
+
+class perform_site_check(difference_detection_processor):
+    screenshot = None
+    xpath_data = None
+
+    def __init__(self, *args, datastore, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.datastore = datastore
+
+    def run(self, uuid, skip_when_checksum_same=True):
+
+        # DeepCopy so we can be sure we don't accidently change anything by reference
+        watch = deepcopy(self.datastore.data['watching'].get(uuid))
+
+        if not watch:
+            raise Exception("Watch no longer exists.")
+
+        # Protect against file:// access
+        if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
+            raise Exception(
+                "file:// type access is denied for security reasons."
+            )
+
+        # Unset any existing notification error
+        update_obj = {'last_notification_error': False, 'last_error': False}
+        extra_headers = watch.get('headers', [])
+
+        # Tweak the base config with the per-watch ones
+        request_headers = deepcopy(self.datastore.data['settings']['headers'])
+        request_headers.update(extra_headers)
+
+        # https://github.com/psf/requests/issues/4525
+        # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
+        # do this by accident.
+        if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
+            request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
+
+        timeout = self.datastore.data['settings']['requests'].get('timeout')
+
+        url = watch.link
+
+        request_body = self.datastore.data['watching'][uuid].get('body')
+        request_method = self.datastore.data['watching'][uuid].get('method')
+        ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
+
+        # Pluggable content fetcher
+        prefer_backend = watch.get_fetch_backend
+        if not prefer_backend or prefer_backend == 'system':
+            prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
+
+        if prefer_backend == 'html_webdriver':
+            preferred_fetcher = fetchers.html_webdriver
+        else:
+            from ..fetchers import html_requests
+            preferred_fetcher = html_requests
+
+
+        proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
+        proxy_url = None
+        if proxy_id:
+            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
+            print("UUID {} Using proxy {}".format(uuid, proxy_url))
+
+        fetcher = preferred_fetcher(proxy_override=proxy_url)
+
+        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
+        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
+        if watch['webdriver_delay'] is not None:
+            fetcher.render_extract_delay = watch.get('webdriver_delay')
+        elif system_webdriver_delay is not None:
+            fetcher.render_extract_delay = system_webdriver_delay
+
+        # Could be removed if requests/plaintext could also return some info?
+        if prefer_backend != 'html_webdriver':
+            raise Exception("Re-stock detection requires Chrome or compatible webdriver/playwright fetcher to work")
+
+        if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
+            fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
+
+        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
+        fetcher.quit()
+
+        self.screenshot = fetcher.screenshot
+        self.xpath_data = fetcher.xpath_data
+
+        # Track the content type
+        update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
+        update_obj["last_check_status"] = fetcher.get_last_status_code()
+
+        # Main detection method
+        fetched_md5 = None
+        if fetcher.instock_data:
+            fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest()
+            # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
+            update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False
+
+
+        # The main thing that all this at the moment comes down to :)
+        changed_detected = False
+
+        if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
+            # Yes if we only care about it going to instock, AND we are in stock
+            if watch.get('in_stock_only') and update_obj["in_stock"]:
+                changed_detected = True
+
+            if not watch.get('in_stock_only'):
+                # All cases
+                changed_detected = True
+
+        # Always record the new checksum
+        update_obj["previous_md5"] = fetched_md5
+
+        return changed_detected, update_obj, fetcher.instock_data.encode('utf-8')
@@ -1,3 +1,5 @@
+# HTML to TEXT/JSON DIFFERENCE FETCHER
+
 import hashlib
 import json
 import logging
@@ -5,13 +7,18 @@ import os
 import re
 import urllib3

-from changedetectionio import content_fetcher, html_tools
+from changedetectionio import html_tools
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from copy import deepcopy
+from . import difference_detection_processor
+from .. import fetchers

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


+name =  'Webpage Text/HTML, JSON and PDF changes'
+description = 'Detects all text changes where possible'
+
 class FilterNotFoundInResponse(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)
@@ -23,7 +30,7 @@ class PDFToHTMLToolNotFound(ValueError):

 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
-class perform_site_check():
+class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

@@ -53,7 +60,7 @@ class perform_site_check():
        watch = deepcopy(self.datastore.data['watching'].get(uuid))

        if not watch:
-            return
+            raise Exception("Watch no longer exists.")

        # Protect against file:// access
        if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
@@ -95,11 +102,12 @@ class perform_site_check():
        if not prefer_backend or prefer_backend == 'system':
            prefer_backend = self.datastore.data['settings']['application']['fetch_backend']

-        if hasattr(content_fetcher, prefer_backend):
-            klass = getattr(content_fetcher, prefer_backend)
+        if prefer_backend == 'html_webdriver':
+            preferred_fetcher = fetchers.html_webdriver
        else:
-            # If the klass doesnt exist, just use a default
-            klass = getattr(content_fetcher, "html_requests")
+            from ..fetchers import html_requests
+            preferred_fetcher = html_requests
+

        proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
        proxy_url = None
@@ -107,7 +115,7 @@ class perform_site_check():
            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
            print("UUID {} Using proxy {}".format(uuid, proxy_url))

-        fetcher = klass(proxy_override=proxy_url)
+        fetcher = preferred_fetcher(proxy_override=proxy_url)

        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
@@ -141,7 +149,7 @@ class perform_site_check():
        update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
        if skip_when_checksum_same:
            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
-                raise content_fetcher.checksumFromPreviousCheckWasTheSame()
+                raise fetchers.exceptions.checksumFromPreviousCheckWasTheSame()


        # Fetching complete, now filters
@@ -273,10 +281,38 @@ class perform_site_check():
        # Re #340 - return the content before the 'ignore text' was applied
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

+
+        # @todo whitespace coming from missing rtrim()?
+        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
+        # Rewrite's the processing text based on only what diff result they want to see
+        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
+            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
+            from .. import diff
+            # needs to not include (added) etc or it may get used twice
+            # Replace the processed text with the preferred result
+            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
+                                                       newest_version_file_contents=stripped_text_from_html,
+                                                       include_equal=False,  # not the same lines
+                                                       include_added=watch.get('filter_text_added', True),
+                                                       include_removed=watch.get('filter_text_removed', True),
+                                                       include_replaced=watch.get('filter_text_replaced', True),
+                                                       line_feed_sep="\n",
+                                                       include_change_type_prefix=False)
+
+            watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
+
+            if not rendered_diff and stripped_text_from_html:
+                # We had some content, but no differences were found
+                # Store our new file as the MD5 so it will trigger in the future
+                c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest()
+                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
+            else:
+                stripped_text_from_html = rendered_diff
+
        # Treat pages with no renderable text content as a change? No by default
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
-            raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
+            raise fetchers.exceptions.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)

        # We rely on the actual text in the html output.. many sites have random script vars etc,
        # in the future we'll implement other mechanisms.
@@ -331,6 +367,7 @@ class perform_site_check():
            blocked = True
            # Filter and trigger works the same, so reuse it
            # It should return the line numbers that match
+            # Unblock flow if the trigger was found (some text remained after stripped what didnt match)
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
                                                  wordlist=trigger_text,
                                                  mode="line numbers")
@@ -0,0 +1,97 @@
+function isItemInStock() {
+  // @todo Pass these in so the same list can be used in non-JS fetchers
+  const outOfStockTexts = [
+    '0 in stock',
+    'agotado',
+    'artikel zurzeit vergriffen',
+    'as soon as stock is available',
+    'available for back order',
+    'backordered',
+    'brak na stanie',
+    'brak w magazynie',
+    'coming soon',
+    'currently unavailable',
+    'en rupture de stock',
+    'item is no longer available',
+    'message if back in stock',
+    'nachricht bei',
+    'nicht auf lager',
+    'nicht lieferbar',
+    'nicht zur verfügung',
+    'no disponible temporalmente',
+    'no longer in stock',
+    'not available',
+    'not in stock',
+    'notify me when available',
+    'não estamos a aceitar encomendas',
+    'out of stock',
+    'out-of-stock',
+    'produkt niedostępny',
+    'sold out',
+    'temporarily out of stock',
+    'temporarily unavailable',
+    'we do not currently have an estimate of when this product will be back in stock.',
+    'zur zeit nicht an lager',
+  ];
+
+
+  const negateOutOfStockRegexs = [
+      '[0-9] in stock'
+  ]
+  var negateOutOfStockRegexs_r = [];
+  for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
+    negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g'));
+  }
+
+
+  const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0);
+
+  // REGEXS THAT REALLY MEAN IT'S IN STOCK
+  for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
+    const element = elementsWithZeroChildren[i];
+    if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
+      var elementText="";
+      if (element.tagName.toLowerCase() === "input") {
+        elementText = element.value.toLowerCase();
+      } else {
+        elementText = element.textContent.toLowerCase();
+      }
+
+      if (elementText.length) {
+        // try which ones could mean its in stock
+        for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
+          if (negateOutOfStockRegexs_r[i].test(elementText)) {
+            return 'Possibly in stock';
+          }
+        }
+      }
+    }
+  }
+
+  // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
+  for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
+    const element = elementsWithZeroChildren[i];
+    if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
+      var elementText="";
+      if (element.tagName.toLowerCase() === "input") {
+        elementText = element.value.toLowerCase();
+      } else {
+        elementText = element.textContent.toLowerCase();
+      }
+
+      if (elementText.length) {
+        // and these mean its out of stock
+        for (const outOfStockText of outOfStockTexts) {
+          if (elementText.includes(outOfStockText)) {
+            return elementText; // item is out of stock
+          }
+        }
+      }
+    }
+  }
+
+  return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
+}
+
+// returns the element text that makes it think it's out of stock
+return isItemInStock();
@@ -28,3 +28,11 @@ pytest tests/test_notification.py
 # Re-run with HIDE_REFERER set - could affect login
 export HIDE_REFERER=True
 pytest tests/test_access_control.py
+
+# Re-run a few tests that will trigger brotli based storage
+export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
+pytest tests/test_access_control.py
+pytest tests/test_notification.py
+pytest tests/test_backend.py
+pytest tests/test_rss.py
+pytest tests/test_unique_lines.py
@@ -241,6 +241,10 @@ body:before {
  font-size: 85%;
 }

+.button-xsmall {
+  font-size: 70%;
+}
+
 .fetch-error {
  padding-top: 1em;
  font-size: 80%;
@@ -889,6 +893,21 @@ body.full-width {
      font-size: .875em;
    }
  }
+  .text-filtering {
+    h3 {
+      margin-top: 0;
+    }
+    border: 1px solid #ccc;
+    padding: 1rem;
+    border-radius: 5px;
+    margin-bottom: 1rem;
+    fieldset:last-of-type {
+      padding-bottom: 0;
+      .pure-control-group {
+        padding-bottom: 0;
+      }
+    }
+  }
 }

 ul {
@@ -1044,3 +1063,30 @@ ul {
  vertical-align: middle;
 }

+
+#quick-watch-processor-type {
+  color: #fff;
+  ul {
+    padding: 0.3rem;
+
+    li {
+      list-style: none;
+      font-size: 0.8rem;
+    }
+  }
+
+}
+
+.restock-label {
+  &.in-stock {
+    background-color: var(--color-background-button-green);
+    color: #fff;
+  }
+  &.not-in-stock {
+    background-color: var(--color-background-button-cancel);
+    color: #777;
+  }
+  padding: 3px;
+  border-radius: 3px;
+  white-space: nowrap;
+}
@@ -432,6 +432,9 @@ body:before {
 .button-small {
  font-size: 85%; }

+.button-xsmall {
+  font-size: 70%; }
+
 .fetch-error {
  padding-top: 1em;
  font-size: 80%;
@@ -869,6 +872,17 @@ body.full-width .edit-form {
    color: var(--color-text-input-description); }
    .edit-form .pure-form-message-inline code {
      font-size: .875em; }
+  .edit-form .text-filtering {
+    border: 1px solid #ccc;
+    padding: 1rem;
+    border-radius: 5px;
+    margin-bottom: 1rem; }
+    .edit-form .text-filtering h3 {
+      margin-top: 0; }
+    .edit-form .text-filtering fieldset:last-of-type {
+      padding-bottom: 0; }
+      .edit-form .text-filtering fieldset:last-of-type .pure-control-group {
+        padding-bottom: 0; }

 ul {
  padding-left: 1em;
@@ -980,3 +994,22 @@ ul {
  display: inline-block;
  height: 0.8rem;
  vertical-align: middle; }
+
+#quick-watch-processor-type {
+  color: #fff; }
+  #quick-watch-processor-type ul {
+    padding: 0.3rem; }
+    #quick-watch-processor-type ul li {
+      list-style: none;
+      font-size: 0.8rem; }
+
+.restock-label {
+  padding: 3px;
+  border-radius: 3px;
+  white-space: nowrap; }
+  .restock-label.in-stock {
+    background-color: var(--color-background-button-green);
+    color: #fff; }
+  .restock-label.not-in-stock {
+    background-color: var(--color-background-button-cancel);
+    color: #777; }
@@ -287,6 +287,7 @@ class ChangeDetectionStore:
                    'method',
                    'paused',
                    'previous_md5',
+                    'processor',
                    'subtractive_selectors',
                    'tag',
                    'text_should_not_be_present',
@@ -360,11 +361,6 @@ class ChangeDetectionStore:
            f.write(screenshot)
            f.close()

-        # Make a JPEG that's used in notifications (due to being a smaller size) available
-        from PIL import Image
-        im1 = Image.open(target_path)
-        im1.convert('RGB').save(target_path.replace('.png','.jpg'), quality=int(os.getenv("NOTIFICATION_SCREENSHOT_JPG_QUALITY", 75)))
-

    def save_error_text(self, watch_uuid, contents):
        if not self.data['watching'].get(watch_uuid):
@@ -17,14 +17,15 @@
                                <li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li>
                                <li><code>tgram://</code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
                                <li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>)</li>
+                                  <li>Accepts the <code>{{ '{{token}}' }}</code> placeholders listed below</li>
                              </ul>
                            </div>
                            <div class="notifications-wrapper">
-                              <a id="send-test-notification" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Send test notification</a>
+                              <a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a>
                            {% if emailprefix %}
-                              <a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a>
+                              <a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email</a>
                            {% endif %}
-                              <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Notification debug logs</a>
+                              <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a>
                            </div>
                        </div>
                        <div id="notification-customisation" class="pure-control-group">
@@ -55,48 +56,66 @@
                                    </thead>
                                    <tbody>
                                    <tr>
-                                        <td><code>{{ '{{ base_url }}' }}</code></td>
+                                        <td><code>{{ '{{base_url}}' }}</code></td>
                                        <td>The URL of the changedetection.io instance you are running.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_url }}' }}</code></td>
+                                        <td><code>{{ '{{watch_url}}' }}</code></td>
                                        <td>The URL being watched.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_uuid }}' }}</code></td>
+                                        <td><code>{{ '{{watch_uuid}}' }}</code></td>
                                        <td>The UUID of the watch.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_title }}' }}</code></td>
+                                        <td><code>{{ '{{watch_title}}' }}</code></td>
                                        <td>The title of the watch.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_tag }}' }}</code></td>
+                                        <td><code>{{ '{{watch_tag}}' }}</code></td>
                                        <td>The watch label / tag</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ preview_url }}' }}</code></td>
+                                        <td><code>{{ '{{preview_url}}' }}</code></td>
                                        <td>The URL of the preview page generated by changedetection.io.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ diff_url }}' }}</code></td>
-                                        <td>The diff output - differences only</td>
+                                        <td><code>{{ '{{diff_url}}' }}</code></td>
+                                        <td>The URL of the diff output for the watch.</td>
+                                    </tr>
+									<tr>
+                                        <td><code>{{ '{{diff}}' }}</code></td>
+                                        <td>The diff output - only changes, additions, and removals</td>
+                                    </tr>
+									<tr>
+                                        <td><code>{{ '{{diff_added}}' }}</code></td>
+                                        <td>The diff output - only changes and additions</td>
+                                    </tr>
+									<tr>
+                                        <td><code>{{ '{{diff_removed}}' }}</code></td>
+                                        <td>The diff output - only changes and removals</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ diff_full }}' }}</code></td>
+                                        <td><code>{{ '{{diff_full}}' }}</code></td>
                                        <td>The diff output - full difference output</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ current_snapshot }}' }}</code></td>
+                                        <td><code>{{ '{{current_snapshot}}' }}</code></td>
                                        <td>The current snapshot value, useful when combined with JSON or CSS filters
                                        </td>
                                    </tr>
+                                    <tr>
+                                        <td><code>{{ '{{triggered_text}}' }}</code></td>
+                                        <td>Text that tripped the trigger from filters</td>
+                                    </tr>
                                    </tbody>
                                </table>
                                <div class="pure-form-message-inline">
                                    <br>
-                                    URLs generated by changedetection.io (such as <code>{{ '{{ diff_url }}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br/>
+                                    URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
                                    Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
+									<br>
+									Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removal%7D%7D-notification-tokens">More Here</a> </br>
                                </div>
                            </div>
                        </div>
@@ -124,12 +124,12 @@
            <div class="pure-control-group">
                {{ render_field(extract_form.extract_regex) }}
                <span class="pure-form-message-inline">
-                    A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br/>
+                    A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>

                    <p>
-                        For example, to extract only the numbers from text &dash;</br>
-                        <strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code></br>
-                        <strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br/>
+                        For example, to extract only the numbers from text &dash;<br>
+                        <strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
+                        <strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
                    </p>
                    <p>
                        <a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
@@ -154,4 +154,4 @@
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>


-{% endblock %}
+{% endblock %}
@@ -34,8 +34,15 @@
            {% if playwright_enabled %}
            <li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
            {% endif %}
+
+            {% if watch['processor'] == 'text_json_diff' %}
            <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
            <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
+            {% endif %}
+
+            {% if watch['processor'] == 'restock_diff' %}
+            <li class="tab"><a href="#restock">Restock Detection</a></li>
+            {% endif %}
            <li class="tab"><a href="#notifications">Notifications</a></li>
        </ul>
    </div>
@@ -49,8 +56,18 @@
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
-                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
-                        <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
+                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
+                        <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
+                        <span class="pure-form-message-inline">
+                        {% if watch['processor'] == 'text_json_diff' %}
+                            Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br>
+                          <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a>
+                        {% else %}
+                        Current mode: <strong>Re-stock detection.</strong><br>
+                          <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a>
+                        {% endif %}
+                        </span>
+
                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.title, class="m-d") }}
@@ -106,10 +123,10 @@
                        {{ render_field(form.webdriver_delay) }}
                        <div class="pure-form-message-inline">
                            <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
-                            <br/>
+                            <br>
                            This will wait <i>n</i> seconds before extracting the text.
                            {% if using_global_webdriver_wait %}
-                            <br/><strong>Using the current global default settings</strong>
+                            <br><strong>Using the current global default settings</strong>
                            {% endif %}
                        </div>
                    </div>
@@ -214,9 +231,10 @@ User-Agent: wonderbra 1.0") }}
                </fieldset>
            </div>

+            {% if watch['processor'] == 'text_json_diff' %}
            <div class="tab-pane-inner" id="filters-and-triggers">
                    <div class="pure-control-group">
-                            <strong>Pro-tips:</strong><br/>
+                            <strong>Pro-tips:</strong><br>
                            <ul>
                                <li>
                                    Use the preview page to see your filters and triggers highlighted.
@@ -226,12 +244,6 @@ User-Agent: wonderbra 1.0") }}
                                </li>
                            </ul>
                    </div>
-                    <fieldset>
-                        <div class="pure-control-group">
-                            {{ render_checkbox_field(form.check_unique_lines) }}
-                            <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
-                        </div>
-                    </fieldset>
                    <div class="pure-control-group">
                        {% set field = render_field(form.include_filters,
                            rows=5,
@@ -241,9 +253,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        %}
                        {{ field }}
                        {% if '/text()' in  field %}
-                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br/>
+                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
+                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>

                    <ul>
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
@@ -266,40 +278,42 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                            </li>
                    </ul>
                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
-                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
+                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
                </span>
                    </div>
-                    <div class="pure-control-group">
-                      {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
+                <fieldset class="pure-control-group">
+                    {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
 footer
 nav
 .stockticker") }}
-                      <span class="pure-form-message-inline">
+                    <span class="pure-form-message-inline">
                        <ul>
                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
-                    </div>
-                <fieldset class="pure-group">
-                    {{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
-/some.regex\d{2}/ for case-INsensitive regex
-                    ") }}
-                    <span class="pure-form-message-inline">
-                        <ul>
-                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
-                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
-                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
-                            <li>Use the preview/show current tab to see ignores</li>
-                        </ul>
-                </span>
+                </fieldset>
+                <div class="text-filtering">
+                <fieldset class="pure-group" id="text-filtering-type-options">
+                    <h3>Text filtering</h3>
+                        Limit trigger/ignore/block/extract to;<br>
+                        {{ render_checkbox_field(form.filter_text_added) }}
+                        {{ render_checkbox_field(form.filter_text_replaced) }}
+                        {{ render_checkbox_field(form.filter_text_removed) }}
+                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span>
+                    <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
+                    <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
+                </fieldset>

-            </fieldset>
+                <fieldset class="pure-control-group">
+                    {{ render_checkbox_field(form.check_unique_lines) }}
+                    <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
+                </fieldset>
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line
 /some.regex\d{2}/ for case-INsensitive regex
-                    ") }}
+") }}
                        <span class="pure-form-message-inline">
                    <ul>
                        <li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
@@ -310,6 +324,21 @@ nav
                        </span>
                    </div>
                </fieldset>
+                <fieldset class="pure-group">
+                    {{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
+/some.regex\d{2}/ for case-INsensitive regex
+") }}
+                    <span class="pure-form-message-inline">
+                        <ul>
+                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
+                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
+                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
+                            <li>Use the preview/show current tab to see ignores</li>
+                        </ul>
+                </span>
+
+                </fieldset>
+
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
@@ -334,7 +363,7 @@ Unavailable") }}
                        <li>Extracts text in the final output (line by line) after other filters using regular expressions;
                            <ul>
                                <li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
-                                <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
+                                <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
                                <li>Keyword example &dash; example <code>Out of stock</code></li>
                                <li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
                            </ul>
@@ -344,8 +373,22 @@ Unavailable") }}
                        </span>
                    </div>
                </fieldset>
+                </div>
            </div>
+            {% endif %}

+            {% if watch['processor'] == 'restock_diff' %}
+            <div class="tab-pane-inner" id="restock">
+                    <fieldset>
+                        <div class="pure-control-group">
+                            {{ render_checkbox_field(form.in_stock_only) }}
+                            <span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
+                        </div>
+                    </fieldset>
+            </div>
+            {% endif %}
+
+            {% if watch['processor'] == 'text_json_diff' %}
            <div class="tab-pane-inner visual-selector-ui" id="visualselector">
                <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">

@@ -353,7 +396,7 @@ Unavailable") }}
                    <div class="pure-control-group">
                        {% if visualselector_enabled %}
                            <span class="pure-form-message-inline">
-                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br/><br/>
+                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br><br>
                            </span>

                            <div id="selector-header">
@@ -378,6 +421,7 @@ Unavailable") }}
                    </div>
                </fieldset>
            </div>
+            {% endif %}

            <div id="actions">
                <div class="pure-control-group">
@@ -1,5 +1,6 @@
 {% extends 'base.html' %}
 {% block content %}
+{% from '_helpers.jinja' import render_field %}
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <div class="edit-form monospaced-textarea">

@@ -14,7 +15,6 @@
        <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
            <div class="tab-pane-inner" id="url-list">
-                <fieldset class="pure-group">
                    <legend>
                        Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
                        (,):
@@ -23,7 +23,7 @@
                        <br>
                        URLs which do not pass validation will stay in the textarea.
                    </legend>
-
+                {{ render_field(form.processor, class="processor") }}

                    <textarea name="urls" class="pure-input-1-2" placeholder="https://"
                              style="width: 100%;
@@ -31,22 +31,24 @@
                                white-space: pre;
                                overflow-wrap: normal;
                                overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
-                </fieldset>

+<div id="quick-watch-processor-type">
+
+                    </div>

            </div>

            <div class="tab-pane-inner" id="distill-io">


-                <fieldset class="pure-group">
+
                    <legend>
-                        Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.</br>
+                        Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
                        This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
-                        <br/>
+                        <br>
                        <p>
-                        How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br/>
-                        Be sure to set your default fetcher to Chrome if required.</br>
+                        How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
+                        Be sure to set your default fetcher to Chrome if required.<br>
                        </p>
                    </legend>

@@ -75,7 +77,7 @@
    ]
 }
 " rows="25">{{ original_distill_json }}</textarea>
-                </fieldset>
+
            </div>
            <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
        </form>
@@ -54,7 +54,7 @@
         <div class="tip">
             For now, Differences are performed on text, not graphically, only the latest screenshot is available.
         </div>
-         </br>
+         <br>
         {% if is_html_webdriver %}
           {% if screenshot %}
             <div class="snapshot-age">{{watch.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
@@ -67,4 +67,4 @@
         {% endif %}
     </div>
 </div>
-{% endblock %}
+{% endblock %}
@@ -40,7 +40,7 @@
                    <div class="pure-control-group">
                        {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
                        <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
-                            <br/>
+                            <br>
                        Set to <strong>0</strong> to disable
                        </span>
                    </div>
@@ -66,7 +66,7 @@
                        {{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
                        class="m-d") }}
                        <span class="pure-form-message-inline">
-                            Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br/>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
+                            Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
                            <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
                        </span>
                    </div>
@@ -105,13 +105,13 @@
                        <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
                        <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
                    </span>
-                    <br/>
+                    <br>
                    Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
                </div>
                <fieldset class="pure-group" id="webdriver-override-options">
                    <div class="pure-form-message-inline">
                        <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
-                        <br/>
+                        <br>
                        This will wait <i>n</i> seconds before extracting the text.
                    </div>
                    <div class="pure-control-group">
@@ -124,14 +124,14 @@

                    <fieldset class="pure-group">
                    {{ render_checkbox_field(form.application.form.ignore_whitespace) }}
-                    <span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br/>
+                    <span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br>
                    <i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
                    </span>
                    </fieldset>
                <fieldset class="pure-group">
                    {{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
                    <span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
-                        <br/>
+                        <br>
                    <i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
                    </span>
                    </fieldset>
@@ -151,7 +151,7 @@ nav
                    {{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
 /some.regex\d{2}/ for case-INsensitive regex
                    ") }}
-                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br/>
+                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
                    <span class="pure-form-message-inline">
                        <ul>
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
@@ -170,8 +170,8 @@ nav

                <div class="pure-control-group">
                    {{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
-                    <div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br/>
-                    <div class="pure-form-message-inline"><br/>API Key <span id="api-key">{{api_key}}</span>
+                    <div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br>
+                    <div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span>
                        <span style="display:none;" id="api-key-copy" >copy</span>
                    </div>
                </div>
@@ -181,7 +181,7 @@ nav
                <p><strong>Tip</strong>: You can connect to websites using <a href="https://brightdata.grsm.io/n0r16zf7eivq">BrightData</a> proxies, their service <strong>WebUnlocker</strong> will solve most CAPTCHAs, whilst their <strong>Residential Proxies</strong> may help to avoid CAPTCHA altogether. </p>
                <p>It may be easier to try <strong>WebUnlocker</strong> first, WebUnlocker also supports country selection.</p>
                <p>
-                    When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br/>
+                    When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br>
                    The Proxy URL with BrightData should start with <code>http://brd-customer...</code>
                </p>

@@ -21,6 +21,10 @@
                    {{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
                </div>
            </div>
+            <div id="quick-watch-processor-type">
+                {{ render_simple_field(form.processor, title="Edit first then Watch") }}
+            </div>
+
        </fieldset>
        <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
    </form>
@@ -28,12 +32,12 @@
    <form class="pure-form" action="{{ url_for('form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
    <div id="checkbox-operations">
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="pause">Pause</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unpause">UnPause</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="mute">Mute</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unmute">UnMute</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="pause">Pause</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="unpause">UnPause</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="mute">Mute</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="unmute">UnMute</button>
+        <button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
+        <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
    </div>
    <div>
@@ -72,7 +76,7 @@
              {% if not ( loop.index >= 3 and loop.index <=4) %}{% continue %}{% endif %} -->
             #}
            <tr id="{{ watch.uuid }}"
-                class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
+                class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
                {% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
                {% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
                {% if watch.paused is defined and watch.paused != False %}paused{% endif %}
@@ -113,12 +117,26 @@
                    {% if watch.last_notification_error is defined and watch.last_notification_error != False %}
                    <div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
                    {% endif %}
-                    {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %}
-                    <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
+
+                    {% if watch['processor'] == 'text_json_diff'  %}
+                        {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %}
+                        <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
+                        {% endif %}
+                        {% if watch['track_ldjson_price_data'] == 'accepted' %}
+                        <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon"/> Price</span>
+                        {% endif %}
                    {% endif %}
-                    {% if watch['track_ldjson_price_data'] == 'accepted' %}
-                    <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon"/> Price</span>
+
+                    {% if watch['processor'] == 'restock_diff'  %}
+                    <span class="restock-label {{'in-stock' if watch['in_stock'] else 'not-in-stock' }}" title="detecting restock conditions">
+                        <!-- maybe some object watch['processor'][restock_diff] or.. -->
+                        {% if watch['last_checked'] %}
+                            {% if watch['in_stock'] %} In stock {% else %} Not in stock {% endif %}
+                        {% else %}
+                            Not yet checked
+                        {% endif %}
                    {% endif %}
+
                    {% if not active_tag %}
                    <span class="watch-tag-list">{{ watch.tag}}</span>
                    {% endif %}
@@ -0,0 +1,2 @@
+"""Tests for the app."""
+
@@ -0,0 +1,3 @@
+#!/usr/bin/python3
+
+from .. import conftest
@@ -0,0 +1,106 @@
+#!/usr/bin/python3
+import os
+import time
+from flask import url_for
+from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from changedetectionio.notification import (
+    default_notification_body,
+    default_notification_format,
+    default_notification_title,
+    valid_notification_formats,
+)
+
+
+def set_original_response():
+    test_return_data = """<html>
+       <body>
+     Some initial text<br>
+     <p>Which is across multiple lines</p>
+     <br>
+     So let's see what happens.  <br>
+     <div>price: $10.99</div>
+     <div id="sametext">Out of stock</div>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+
+
+def set_back_in_stock_response():
+    test_return_data = """<html>
+       <body>
+     Some initial text<br>
+     <p>Which is across multiple lines</p>
+     <br>
+     So let's see what happens.  <br>
+     <div>price: $10.99</div>
+     <div id="sametext">Available!</div>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
+def test_restock_detection(client, live_server):
+
+    set_original_response()
+    #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
+
+    time.sleep(1)
+    live_server_setup(live_server)
+    #####################
+    notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
+
+
+    #####################
+    # Set this up for when we remove the notification from the watch, it should fallback with these details
+    res = client.post(
+        url_for("settings_page"),
+        data={"application-notification_urls": notification_url,
+              "application-notification_title": "fallback-title "+default_notification_title,
+              "application-notification_body": "fallback-body "+default_notification_body,
+              "application-notification_format": default_notification_format,
+              "requests-time_between_check-minutes": 180,
+              'application-fetch_backend': "html_webdriver"},
+        follow_redirects=True
+    )
+    # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
+    test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
+
+
+    client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": test_url, "tag": '', 'processor': 'restock_diff'},
+        follow_redirects=True
+    )
+
+    # Is it correctly show as NOT in stock?
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'not-in-stock' in res.data
+
+    # Is it correctly shown as in stock
+    set_back_in_stock_response()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'not-in-stock' not in res.data
+
+    # We should have a notification
+    time.sleep(2)
+    assert os.path.isfile("test-datastore/notification.txt")
+    os.unlink("test-datastore/notification.txt")
+
+    # Default behaviour is to only fire notification when it goes OUT OF STOCK -> IN STOCK
+    # So here there should be no file, because we go IN STOCK -> OUT OF STOCK
+    set_original_response()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    assert not os.path.isfile("test-datastore/notification.txt")
@@ -0,0 +1,176 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from .util import live_server_setup
+from changedetectionio import html_tools
+
+
+def set_original(excluding=None, add_line=None):
+    test_return_data = """<html>
+     <body>
+     <p>Some initial text</p>
+     <p>So let's see what happens.</p>
+     <p>and a new line!</p>
+     <p>The golden line</p>
+     <p>A BREAK TO MAKE THE TOP LINE STAY AS "REMOVED" OR IT WILL GET COUNTED AS "CHANGED INTO"</p>
+     <p>Something irrelevant</p>          
+     </body>
+     </html>
+    """
+
+    if add_line:
+        c=test_return_data.splitlines()
+        c.insert(5, add_line)
+        test_return_data = "\n".join(c)
+
+    if excluding:
+        output = ""
+        for i in test_return_data.splitlines():
+            if not excluding in i:
+                output += f"{i}\n"
+
+        test_return_data = output
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+
+def test_setup(client, live_server):
+    live_server_setup(live_server)
+
+def test_check_removed_line_contains_trigger(client, live_server):
+    sleep_time_for_fetch_thread = 3
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+    set_original()
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"trigger_text": 'The golden line',
+              "url": test_url,
+              'fetch_backend': "html_requests",
+              'filter_text_removed': 'y'},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    set_original(excluding='Something irrelevant')
+
+    # A line thats not the trigger should not trigger anything
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data
+
+    # The trigger line is REMOVED,  this should trigger
+    set_original(excluding='The golden line')
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+
+    # Now add it back, and we should not get a trigger
+    client.get(url_for("mark_all_viewed"), follow_redirects=True)
+    set_original(excluding=None)
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data
+
+    # Remove it again, and we should get a trigger
+    set_original(excluding='The golden line')
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
+
+def test_check_add_line_contains_trigger(client, live_server):
+
+    sleep_time_for_fetch_thread = 3
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+    test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://') + "?xxx={{ watch_url }}"
+
+    res = client.post(
+        url_for("settings_page"),
+        data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
+              "application-notification_body": 'triggered text was -{{triggered_text}}-',
+              # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
+              "application-notification_urls": test_notification_url,
+              "application-minutes_between_check": 180,
+              "application-fetch_backend": "html_requests"
+              },
+        follow_redirects=True
+    )
+    assert b'Settings updated' in res.data
+
+    set_original()
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"trigger_text": 'Oh yes please',
+              "url": test_url,
+              'fetch_backend': "html_requests",
+              'filter_text_removed': '',
+              'filter_text_added': 'y'},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    set_original(excluding='Something irrelevant')
+
+    # A line thats not the trigger should not trigger anything
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data
+
+    # The trigger line is ADDED,  this should trigger
+    set_original(add_line='<p>Oh yes please</p>')
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+    with open("test-datastore/notification.txt", 'r') as f:
+        response= f.read()
+        assert '-Oh yes please-' in response
+
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
@@ -11,10 +11,10 @@ import uuid
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that will change</div>
     </body>
@@ -29,10 +29,10 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that changes</div>
     </body>
@@ -7,10 +7,10 @@ from .util import live_server_setup, extract_UUID_from_client, extract_api_key_f
 def set_response_with_ldjson():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>
     <script type="application/ld+json">
@@ -61,10 +61,10 @@ def set_response_with_ldjson():
 def set_response_without_ldjson():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>     
     </body>
@@ -143,4 +143,4 @@ def test_check_ldjson_price_autodetect(client, live_server):
    assert b'ldjson-price-track-offer' not in res.data
    
    ##########################################################################################
-    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -11,7 +11,7 @@ sleep_time_for_fetch_thread = 3
 # Basic test to check inscriptus is not adding return line chars, basically works etc
 def test_inscriptus():
    from inscriptis import get_text
-    html_content = "<html><body>test!<br/>ok man</body></html>"
+    html_content = "<html><body>test!<br>ok man</body></html>"
    stripped_text_from_html = get_text(html_content)
    assert stripped_text_from_html == 'test!\nok man'

@@ -82,7 +82,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
    assert b'<rss' in res.data

    # re #16 should have the diff in here too
-    assert b'(into   ) which has this one new line' in res.data
+    assert b'(into) which has this one new line' in res.data
    assert b'CDATA' in res.data

    assert expected_url.encode('utf-8') in res.data
@@ -8,10 +8,10 @@ from changedetectionio import html_tools
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -24,10 +24,10 @@ def set_original_ignore_response():
 def set_modified_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <p>new ignore stuff</p>
     <p>out of stock</p>
     <p>blah</p>
@@ -44,11 +44,11 @@ def set_modified_original_ignore_response():
 def set_modified_response_minus_block_text():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
     <p>now on sale $2/p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <p>new ignore stuff</p>
     <p>blah</p>
     </body>
@@ -87,7 +87,10 @@ def test_check_block_changedetection_text_NOT_present(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"text_should_not_be_present": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
+        data={"text_should_not_be_present": ignore_text,
+              "url": test_url,
+              'fetch_backend': "html_requests"
+              },
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@@ -129,7 +132,6 @@ def test_check_block_changedetection_text_NOT_present(client, live_server):
    set_modified_response_minus_block_text()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    time.sleep(sleep_time_for_fetch_thread)
-
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that will change</div>
     </body>
@@ -29,10 +29,10 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that changes</div>
     </body>
@@ -25,10 +25,10 @@ def set_original_response():
    </ul>
    </nav>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
    <div id="changetext">Some text that will change</div>
     </body>
    <footer>
@@ -54,10 +54,10 @@ def set_modified_response():
    </ul>
    </nav>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
    <div id="changetext">Some text that changes</div>
     </body>
    <footer>
@@ -71,7 +71,6 @@ def set_modified_response():


 def test_element_removal_output():
-    from changedetectionio import fetch_site_status
    from inscriptis import get_text

    # Check text with sub-parts renders correctly
@@ -85,7 +84,7 @@ def test_element_removal_output():
    </ul>
    </nav>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>across multiple lines</p>
     <div id="changetext">Some text that changes</div>
     </body>
@@ -59,6 +59,8 @@ def test_http_error_handler(client, live_server):
    _runner_test_http_errors(client, live_server, 404, 'Page not found')
    _runner_test_http_errors(client, live_server, 500, '(Internal server Error) received')
    _runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data

 # Just to be sure error text is properly handled
 def test_DNS_errors(client, live_server):
@@ -81,4 +83,48 @@ def test_DNS_errors(client, live_server):
    assert found_name_resolution_error
    # Should always record that we tried
    assert bytes("just now".encode('utf-8')) in res.data
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data

+# Re 1513
+def test_low_level_errors_clear_correctly(client, live_server):
+    #live_server_setup(live_server)
+    # Give the endpoint time to spin up
+    time.sleep(1)
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("<html><body><div id=here>Hello world</div></body></html>")
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": "https://dfkjasdkfjaidjfsdajfksdajfksdjfDOESNTEXIST.com"},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    time.sleep(2)
+
+    # We should see the DNS error
+    res = client.get(url_for("index"))
+    found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
+    assert found_name_resolution_error
+
+    # Update with what should work
+    client.post(
+        url_for("edit_page", uuid="first"),
+        data={
+            "url": test_url,
+            "fetch_backend": "html_requests"},
+        follow_redirects=True
+    )
+
+    # Now the error should be gone
+    time.sleep(2)
+    res = client.get(url_for("index"))
+    found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
+    assert not found_name_resolution_error
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
@@ -10,10 +10,10 @@ from ..html_tools import *
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>     
     </body>
@@ -28,12 +28,12 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
-     <div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/>  2000 online )</div>
+     <div class="changetext">Some text that did change ( 1000 online <br> 80 guests<br>  2000 online )</div>
     <div class="changetext">SomeCase insensitive 3456</div>
     </body>
     </html>
@@ -49,8 +49,8 @@ def set_multiline_response():
    test_return_data = """<html>
       <body>
     
-     <p>Something <br/>
-        across 6 billion multiple<br/>
+     <p>Something <br>
+        across 6 billion multiple<br>
        lines
     </p>
     
@@ -11,10 +11,10 @@ from changedetectionio.model import App
 def set_response_without_filter():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="nope-doesnt-exist">Some text thats the same</div>     
     </body>
     </html>
@@ -28,10 +28,10 @@ def set_response_without_filter():
 def set_response_with_filter():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="ticket-available">Ticket now on sale!</div>     
     </body>
     </html>
@@ -8,10 +8,10 @@ from changedetectionio.model import App
 def set_response_with_filter():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="nope-doesnt-exist">Some text thats the same</div>     
     </body>
     </html>
@@ -145,4 +145,4 @@ def test_check_xpath_filter_failure_notification(client, live_server):
    time.sleep(1)
    run_filter_test(client, '//*[@id="nope-doesnt-exist"]')

-# Test that notification is never sent
+# Test that notification is never sent
@@ -6,11 +6,11 @@ from ..html_tools import html_to_text
 def test_html_to_text_func():
    test_html = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
     <a href="/first_link"> More Text </a>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <a href="second_link.com"> Even More Text </a>
     </body>
     </html>
@@ -21,7 +21,7 @@ def test_html_to_text_func():

    no_links_text = \
        "Some initial text\n\nWhich is across multiple " \
-        "lines\n\nMore Text So let's see what happens. Even More Text"
+        "lines\n\nMore Text\nSo let's see what happens.\nEven More Text"

    # check that no links are in the extracted text
    assert text_content == no_links_text
@@ -31,7 +31,7 @@ def test_html_to_text_func():

    links_text = \
        "Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
-        "](/first_link) So let's see what happens. [ Even More Text ]" \
+        "](/first_link)\nSo let's see what happens.\n[ Even More Text ]" \
        "(second_link.com)"

    # check that links are present in the extracted text
@@ -1,7 +1,5 @@
 #!/usr/bin/python3

-import time
-from flask import url_for
 from . util import live_server_setup
 from changedetectionio import html_tools

@@ -11,7 +9,7 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_regex_text_func():
-    from changedetectionio import fetch_site_status
+    from ..processors import text_json_diff as fetch_site_status

    test_content = """
    but sometimes we want to remove the lines.
@@ -11,7 +11,8 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_text_func():
-    from changedetectionio import fetch_site_status
+    from ..processors import text_json_diff as fetch_site_status
+

    test_content = """
    Some content
@@ -33,10 +34,10 @@ def test_strip_text_func():
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -49,10 +50,10 @@ def set_original_ignore_response():
 def set_modified_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <p>new ignore stuff</p>
     <p>blah</p>
     </body>
@@ -68,11 +69,11 @@ def set_modified_original_ignore_response():
 def set_modified_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
     <P>ZZZZz</P>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <a href="/original_link"> Some More Text </a>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -29,10 +29,10 @@ def set_original_ignore_response():
 def set_modified_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <a href="/modified_link"> Some More Text </a>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -27,10 +27,10 @@ def set_original_response():
 def set_some_changed_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines, and a new thing too.</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -12,15 +12,15 @@ def test_setup(live_server):
 def set_original_ignore_response_but_with_whitespace():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>


     Which is across multiple lines</p>
     <br>
-     </br>
+     <br>

-         So let's see what happens.  </br>
+         So let's see what happens.  <br>


     </body>
@@ -34,10 +34,10 @@ def set_original_ignore_response_but_with_whitespace():
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -198,8 +198,8 @@ def test_check_json_without_filter(client, live_server):
    )

    # Should still see '"html": "<b>"'
-    assert b'&#34;&lt;b&gt;' in res.data
-    assert res.data.count(b'{\n') >= 2
+    assert b'&#34;html&#34;: &#34;&lt;b&gt;&#34;' in res.data
+    assert res.data.count(b'{') >= 2

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
@@ -73,16 +73,12 @@ def test_check_notification(client, live_server):
    # We write the PNG to disk, but a JPEG should appear in the notification
    # Write the last screenshot png
    testimage_png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
-    # This one is created when we save the screenshot from the webdriver/playwright session (converted from PNG)
-    testimage_jpg = '/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AKp//2Q=='


    uuid = extract_UUID_from_client(client)
    datastore = 'test-datastore'
    with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
        f.write(base64.b64decode(testimage_png))
-    with open(os.path.join(datastore, str(uuid), 'last-screenshot.jpg'), 'wb') as f:
-        f.write(base64.b64decode(testimage_jpg))

    # Goto the edit page, add our ignore text
    # Add our URL to the import page
@@ -100,6 +96,8 @@ def test_check_notification(client, live_server):
                                                   "Diff URL: {{diff_url}}\n"
                                                   "Snapshot: {{current_snapshot}}\n"
                                                   "Diff: {{diff}}\n"
+                                                   "Diff Added: {{diff_added}}\n"
+                                                   "Diff Removed: {{diff_removed}}\n"
                                                   "Diff Full: {{diff_full}}\n"
                                                   ":-)",
                              "notification_screenshot": True,
@@ -147,7 +145,7 @@ def test_check_notification(client, live_server):
    assert ':-)' in notification_submission
    assert "Diff Full: Some initial text" in notification_submission
    assert "Diff: (changed) Which is across multiple lines" in notification_submission
-    assert "(into   ) which has this one new line" in notification_submission
+    assert "(into) which has this one new line" in notification_submission
    # Re #342 - check for accidental python byte encoding of non-utf8/string
    assert "b'" not in notification_submission
    assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
@@ -160,12 +158,12 @@ def test_check_notification(client, live_server):

    # Check the attachment was added, and that it is a JPEG from the original PNG
    notification_submission_object = json.loads(notification_submission)
-    assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.jpg'
+    # We keep PNG screenshots for now
+    assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
    assert len(notification_submission_object['attachments'][0]['base64'])
-    assert notification_submission_object['attachments'][0]['mimetype'] == 'image/jpeg'
+    assert notification_submission_object['attachments'][0]['mimetype'] == 'image/png'
    jpeg_in_attachment = base64.b64decode(notification_submission_object['attachments'][0]['base64'])
-    assert b'JFIF' in jpeg_in_attachment
-    assert testimage_png not in notification_submission
+
    # Assert that the JPEG is readable (didn't get chewed up somewhere)
    from PIL import Image
    import io
@@ -297,7 +295,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server):
        follow_redirects=True
    )
    assert b'Settings updated' in res.data
-
+    client.get(
+        url_for("form_delete", uuid="all"),
+        follow_redirects=True
+    )
    # Add a watch and trigger a HTTP POST
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
@@ -8,10 +8,10 @@ from . util import live_server_setup
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -24,10 +24,10 @@ def set_original_ignore_response():
 def set_modified_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -40,12 +40,12 @@ def set_modified_original_ignore_response():
 def set_modified_with_trigger_text_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
+     <br>
     Add to cart
-     <br/>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -142,4 +142,4 @@ def test_trigger_functionality(client, live_server):
    res = client.get(url_for("preview_page", uuid="first"))

    # We should be able to see what we triggered on
-    assert b'<div class="triggered">Add to cart' in res.data
+    assert b'<div class="triggered">Add to cart' in res.data
@@ -8,10 +8,10 @@ from . util import live_server_setup
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -72,7 +72,7 @@ def test_trigger_regex_functionality(client, live_server):
    assert b'unviewed' not in res.data

    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write("regex test123<br/>\nsomething 123")
+        f.write("regex test123<br>\nsomething 123")

    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    time.sleep(sleep_time_for_fetch_thread)
@@ -81,4 +81,4 @@ def test_trigger_regex_functionality(client, live_server):

    # Cleanup everything
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    assert b'Deleted' in res.data
@@ -8,10 +8,10 @@ from . util import live_server_setup
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -94,7 +94,6 @@ def test_unique_lines_functionality(client, live_server):
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data

-
    # Now set the content which contains the new text and re-ordered existing text
    set_modified_with_trigger_text_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>
     </body>
@@ -29,10 +29,10 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  THIS CHANGES AND SHOULDNT TRIGGER A CHANGE</br>
+     <br>
+     So let's see what happens.  THIS CHANGES AND SHOULDNT TRIGGER A CHANGE<br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some new text</div>
     </body>
@@ -13,18 +13,51 @@ class TestDiffBuilder(unittest.TestCase):

    def test_expected_diff_output(self):
        base_dir = os.path.dirname(__file__)
-        output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
+        with open(base_dir + "/test-content/before.txt", 'r') as f:
+            previous_version_file_contents = f.read()
+
+        with open(base_dir + "/test-content/after.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+
+        output = diff.render_diff(previous_version_file_contents=previous_version_file_contents,
+                                  newest_version_file_contents=newest_version_file_contents)
+
        output = output.split("\n")
+
+
        self.assertIn('(changed) ok', output)
-        self.assertIn('(into   ) xok', output)
-        self.assertIn('(into   ) next-x-ok', output)
-        self.assertIn('(added  ) and something new', output)
+        self.assertIn('(into) xok', output)
+        self.assertIn('(into) next-x-ok', output)
+        self.assertIn('(added) and something new', output)

-
-        output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
+        with open(base_dir + "/test-content/after-2.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+        output = diff.render_diff(previous_version_file_contents, newest_version_file_contents)
        output = output.split("\n")
        self.assertIn('(removed) for having learned computerese,', output)
        self.assertIn('(removed) I continue to examine bits, bytes and words', output)
+        
+        #diff_removed
+        with open(base_dir + "/test-content/before.txt", 'r') as f:
+            previous_version_file_contents = f.read()
+
+        with open(base_dir + "/test-content/after.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+        output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
+        output = output.split("\n")
+        self.assertIn('(changed) ok', output)
+        self.assertIn('(into) xok', output)
+        self.assertIn('(into) next-x-ok', output)
+        self.assertNotIn('(added) and something new', output)
+        
+        #diff_removed
+        with open(base_dir + "/test-content/after-2.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+        output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
+        output = output.split("\n")
+        self.assertIn('(removed) for having learned computerese,', output)
+        self.assertIn('(removed) I continue to examine bits, bytes and words', output)
+        

        # @todo test blocks of changed, blocks of added, blocks of removed

@@ -9,10 +9,10 @@ def set_original_response():
    test_return_data = """<html>
    <head><title>head title</title></head>
    <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <span class="foobar-detection" style='display:none'></span>
     </body>
     </html>
@@ -26,10 +26,10 @@ def set_modified_response():
    test_return_data = """<html>
    <head><title>modified head title</title></head>
    <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -43,11 +43,11 @@ def set_more_modified_response():
    test_return_data = """<html>
    <head><title>modified head title</title></head>
    <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
-     Ohh yeah awesome<br/>
+     <br>
+     So let's see what happens.  <br>
+     Ohh yeah awesome<br>
     </body>
     </html>
    """
@@ -3,9 +3,8 @@ import threading
 import queue
 import time

-from changedetectionio import content_fetcher
-from changedetectionio import queuedWatchMetaData
-from changedetectionio.fetch_site_status import FilterNotFoundInResponse
+from .processors.text_json_diff import FilterNotFoundInResponse
+from .fetchers import exceptions

 # A single update worker
 #
@@ -65,20 +64,32 @@ class update_worker(threading.Thread):
        if 'notification_urls' in n_object and n_object['notification_urls']:
            # HTML needs linebreak, but MarkDown and Text can use a linefeed
            if n_object['notification_format'] == 'HTML':
-                line_feed_sep = "</br>"
+                line_feed_sep = "<br>"
            else:
                line_feed_sep = "\n"

-            with open(watch_history[dates[-1]], 'rb') as f:
-                snapshot_contents = f.read()
+            # Add text that was triggered
+            snapshot_contents = watch.get_history_snapshot(dates[-1])
+            trigger_text = watch.get('trigger_text', [])
+            triggered_text = ''
+
+            if len(trigger_text):
+                from . import html_tools
+                triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
+                if triggered_text:
+                    triggered_text = line_feed_sep.join(triggered_text)
+

            n_object.update({
-                'watch_url': watch['url'],
+                'current_snapshot': snapshot_contents,
+                'diff': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), line_feed_sep=line_feed_sep),
+                'diff_added': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_removed=False, line_feed_sep=line_feed_sep),
+                'diff_full': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_equal=True, line_feed_sep=line_feed_sep),
+                'diff_removed': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_added=False, line_feed_sep=line_feed_sep),
+                'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None,
+                'triggered_text': triggered_text,
                'uuid': watch_uuid,
-                'screenshot': watch.get_screenshot_as_jpeg() if watch.get('notification_screenshot') else None,
-                'current_snapshot': snapshot_contents.decode('utf-8'),
-                'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
-                'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
+                'watch_url': watch['url'],
            })
            logging.info (">> SENDING NOTIFICATION")
            self.notification_q.put(n_object)
@@ -151,9 +162,8 @@ class update_worker(threading.Thread):
                os.unlink(full_path)

    def run(self):
-        from changedetectionio import fetch_site_status

-        update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
+        from .processors import text_json_diff, restock_diff

        while not self.app.config.exit.is_set():

@@ -170,11 +180,22 @@ class update_worker(threading.Thread):
                    changed_detected = False
                    contents = b''
                    process_changedetection_results = True
-                    update_obj= {}
-                    print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
+                    update_obj = {}
+                    print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority,
+                                                                           self.datastore.data['watching'][uuid]['url']))
                    now = time.time()

                    try:
+                        processor = self.datastore.data['watching'][uuid].get('processor','text_json_diff')
+
+                        # @todo some way to switch by name
+                        update_handler = None
+                        if processor == 'restock_diff':
+                            update_handler = restock_diff.perform_site_check(datastore=self.datastore)
+                        else:
+                            # Used as a default and also by some tests
+                            update_handler = text_json_diff.perform_site_check(datastore=self.datastore)
+
                        changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -184,7 +205,7 @@ class update_worker(threading.Thread):
                    except PermissionError as e:
                        self.app.logger.error("File permission error updating", uuid, str(e))
                        process_changedetection_results = False
-                    except content_fetcher.ReplyWithContentButNoText as e:
+                    except exceptions.ReplyWithContentButNoText as e:
                        # Totally fine, it's by choice - just continue on, nothing more to care about
                        # Page had elements/content but no renderable text
                        # Backend (not filters) gave zero output
@@ -193,7 +214,7 @@ class update_worker(threading.Thread):
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
                        process_changedetection_results = False

-                    except content_fetcher.Non200ErrorCodeReceived as e:
+                    except exceptions.Non200ErrorCodeReceived as e:
                        if e.status_code == 403:
                            err_text = "Error - 403 (Access denied) received"
                        elif e.status_code == 404:
@@ -237,12 +258,12 @@ class update_worker(threading.Thread):

                        process_changedetection_results = False

-                    except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
+                    except exceptions.checksumFromPreviousCheckWasTheSame as e:
                        # Yes fine, so nothing todo, don't continue to process.
                        process_changedetection_results = False
                        changed_detected = False

-                    except content_fetcher.BrowserStepsStepTimout as e:
+                    except exceptions.BrowserStepsStepTimout as e:

                        if not self.datastore.data['watching'].get(uuid):
                            continue
@@ -267,25 +288,25 @@ class update_worker(threading.Thread):

                        process_changedetection_results = False

-                    except content_fetcher.EmptyReply as e:
+                    except exceptions.EmptyReply as e:
                        # Some kind of custom to-str handler in the exception handler that does this?
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
-                    except content_fetcher.ScreenshotUnavailable as e:
+                    except exceptions.ScreenshotUnavailable as e:
                        err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
-                    except content_fetcher.JSActionExceptions as e:
+                    except exceptions.JSActionExceptions as e:
                        err_text = "Error running JS Actions - Page request - "+e.message
                        if e.screenshot:
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
-                    except content_fetcher.PageUnloadable as e:
+                    except exceptions.PageUnloadable as e:
                        err_text = "Page request from server didnt respond correctly"
                        if e.message:
                            err_text = "{} - {}".format(err_text, e.message)
@@ -312,6 +333,7 @@ class update_worker(threading.Thread):

                        self.cleanup_error_artifacts(uuid)

+                    #
                    # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
                    if process_changedetection_results:
                        try:
@@ -68,5 +68,5 @@ pillow
 # playwright is installed at Dockerfile build time because it's not available on all platforms

 # Include pytest, so if theres a support issue we can ask them to run these tests on their setup
-pytest ~=6.2
+pytest ~=7.2
 pytest-flask ~=1.2
Author	SHA1	Message	Date
dgtlmoon	ec77b45e84	WIP	2023-04-08 21:14:03 +02:00
dgtlmoon	138f7fc59c	WIP	2023-04-08 20:35:13 +02:00
dgtlmoon	56b768d24f	WIP	2023-04-08 20:12:30 +02:00
dgtlmoon	a61d7b4284	Attempt to abstract out each fetch type (requests/playwright/webdriver etc)	2023-04-08 18:49:27 +02:00
dgtlmoon	9076ba6bd3	Tests - error test - be sure to clear results from other test parts	2023-04-06 16:12:18 +02:00
dgtlmoon	43af18e2bc	Update README.md	2023-04-06 15:26:06 +02:00
dgtlmoon	ad75e8cdd0	Tests - Add test to check that low level fetch errors are cleared on next check	2023-04-06 14:46:08 +02:00
dgtlmoon	f604643356	Restock alerts - adding extra detection texts	2023-04-06 13:51:33 +02:00
dgtlmoon	d5fd22f693	Restock monitor - Identify the cases where the product is also definitely in stock (#1489 )	2023-03-23 18:34:56 +01:00
dgtlmoon	1d9d11b3f5	Automated CI test for ensuring pypi package was built correctly (#1488 )	2023-03-23 12:20:18 +01:00
dgtlmoon	f49464f451	GitHub container build - 'provenance' was disabled	2023-03-22 10:40:49 +01:00
dgtlmoon	bc6bde4062	0.41.1	2023-03-21 23:16:01 +01:00
dgtlmoon	2863167f45	Fix for pip installations	2023-03-21 23:15:53 +01:00
dgtlmoon	ce3966c104	0.41	2023-03-21 20:30:21 +01:00
dgtlmoon	d5f574ca17	Notifications - Include triggered text token as `{{triggered_text}}` in notifications, so you can send just the content that matches. (#1485 )	2023-03-21 19:16:13 +01:00
dgtlmoon	c96ece170a	Notification tokens - add comment that the {{tokens}} can be used in the URLs also	2023-03-21 19:04:12 +01:00
dgtlmoon	1fb90bbddc	Quick add form - adjust font size and rename stock recheck	2023-03-20 20:19:32 +01:00
dgtlmoon	55b6ae86e8	Ability to set which text to process triggers on (added, removed, changed) according to the difference (#1483 )	2023-03-20 20:16:57 +01:00
dgtlmoon	66b892f770	Restock / stock / out of stock monitor - bumping detection texts	2023-03-20 15:01:52 +01:00
dgtlmoon	3b80bb2f0e	Use brotli for reducing the size of the text snapshots (#1482 )	2023-03-19 21:12:22 +01:00
dgtlmoon	e6d2d87b31	Notification screenshots - now PNG only for now to save disk space (no point creating two images) (#1481 )	2023-03-18 20:52:52 +01:00
dgtlmoon	6e71088cde	New feature - Restock / stock / out of stock monitor option/mode	2023-03-18 20:36:26 +01:00
dgtlmoon	2bc988dffc	UI - Clone/copy watch - A paused watch should not be checked when copied/cloned #1471 .	2023-03-17 23:58:15 +01:00
dgtlmoon	a578de36c5	Update README.md	2023-03-17 16:56:29 +01:00
dgtlmoon	4c74d39df0	Code - Abstract out the diff fetch types to make it easier to integrate new ones (#1467 )	2023-03-12 18:11:53 +01:00
dgtlmoon	c454cbb808	BrowserSteps - Adding `Goto URL` step	2023-03-12 17:22:56 +01:00
dgtlmoon	6f1eec0d5a	Fixing bad linebreak definition `</br>` in notifications and UI (#1465 )	2023-03-12 17:05:34 +01:00
reecespieces	0d05ee1586	Notification Improvements - New tokens `{{diff_added}}` and `{{diff_removed}}`, removed whitespace around `added` and `into` ( Issue #905 ) (#1454 )	2023-03-12 16:21:47 +01:00
dgtlmoon	23476f0e70	Update README.md	2023-03-01 23:13:35 +01:00