More debug around queue size

Update bug_report.md
Security - Fix test
2026-06-18 06:41:00 +00:00 · 2024-11-10 10:21:18 +01:00 · 2024-11-10 10:13:22 +01:00 · 2024-11-07 20:10:02 +01:00 · 2024-11-07 19:41:48 +01:00 · 2024-11-07 18:47:18 +01:00
58 changed files with 1122 additions and 366 deletions
@@ -27,6 +27,10 @@ A clear and concise description of what the bug is.
 **Version**
 *Exact version* in the top right area: 0....

+**How did you install?**
+
+Docker, Pip, from source directly etc
+
 **To Reproduce**

 Steps to reproduce the behavior:
@@ -10,5 +10,6 @@ dist
 venv
 test-datastore/*
 test-datastore
+test-memory.log
 *.egg-info*
 .vscode/settings.json
@@ -37,6 +37,7 @@ RUN pip install --target=/dependencies playwright~=1.41.2 \

 # Final image stage
 FROM python:${PYTHON_VERSION}-slim-bookworm
+LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"

 RUN apt-get update && apt-get install -y --no-install-recommends \
    libxslt1.1 \
@@ -1,4 +1,5 @@
 recursive-include changedetectionio/api *
+recursive-include changedetectionio/apprise_plugin *
 recursive-include changedetectionio/blueprint *
 recursive-include changedetectionio/content_fetchers *
 recursive-include changedetectionio/model *
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.46.04'
+__version__ = '0.47.06'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
@@ -58,7 +58,7 @@ class Watch(Resource):
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

        if request.args.get('recheck'):
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
            self.datastore.data['watching'].get(uuid).pause()
@@ -246,7 +246,7 @@ class CreateWatch(Resource):

        new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
        if new_uuid:
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
            return {'uuid': new_uuid}, 201
        else:
            return "Invalid or unsupported URL", 400
@@ -303,7 +303,7 @@ class CreateWatch(Resource):

        if request.args.get('recheck_all'):
            for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return {'status': "OK"}, 200

        return list, 200
@@ -13,6 +13,7 @@ from loguru import logger
 def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
    import requests
    import json
+    from urllib.parse import unquote_plus
    from apprise.utils import parse_url as apprise_parse_url
    from apprise import URLBase

@@ -47,7 +48,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
    if results:
        # Add our headers that the user can potentially over-ride if they wish
        # to to our returned result set and tidy entries by unquoting them
-        headers = {URLBase.unquote(x): URLBase.unquote(y)
+        headers = {unquote_plus(x): unquote_plus(y)
                   for x, y in results['qsd+'].items()}

        # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
@@ -55,14 +56,14 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
        # but here we are making straight requests, so we need todo convert this against apprise's logic
        for k, v in results['qsd'].items():
            if not k.strip('+-') in results['qsd+'].keys():
-                params[URLBase.unquote(k)] = URLBase.unquote(v)
+                params[unquote_plus(k)] = unquote_plus(v)

        # Determine Authentication
        auth = ''
        if results.get('user') and results.get('password'):
-            auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
+            auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
        elif results.get('user'):
-            auth = (URLBase.unquote(results.get('user')))
+            auth = (unquote_plus(results.get('user')))

    # Try to auto-guess if it's JSON
    h = 'application/json; charset=utf-8'
@@ -0,0 +1,164 @@
+import datetime
+import glob
+import threading
+
+from flask import Blueprint, render_template, send_from_directory, flash, url_for, redirect, abort
+import os
+
+from changedetectionio.store import ChangeDetectionStore
+from changedetectionio.flask_app import login_optionally_required
+from loguru import logger
+
+BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
+
+
+def create_backup(datastore_path, watches: dict):
+    logger.debug("Creating backup...")
+    import zipfile
+    from pathlib import Path
+
+    # create a ZipFile object
+    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+    backupname = BACKUP_FILENAME_FORMAT.format(timestamp)
+    backup_filepath = os.path.join(datastore_path, backupname)
+
+    with zipfile.ZipFile(backup_filepath.replace('.zip', '.tmp'), "w",
+                         compression=zipfile.ZIP_DEFLATED,
+                         compresslevel=8) as zipObj:
+
+        # Add the index
+        zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json")
+
+        # Add the flask app secret
+        zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt")
+
+        # Add any data in the watch data directory.
+        for uuid, w in watches.items():
+            for f in Path(w.watch_data_dir).glob('*'):
+                zipObj.write(f,
+                             # Use the full path to access the file, but make the file 'relative' in the Zip.
+                             arcname=os.path.join(f.parts[-2], f.parts[-1]),
+                             compress_type=zipfile.ZIP_DEFLATED,
+                             compresslevel=8)
+
+        # Create a list file with just the URLs, so it's easier to port somewhere else in the future
+        list_file = "url-list.txt"
+        with open(os.path.join(datastore_path, list_file), "w") as f:
+            for uuid in watches:
+                url = watches[uuid]["url"]
+                f.write("{}\r\n".format(url))
+        list_with_tags_file = "url-list-with-tags.txt"
+        with open(
+                os.path.join(datastore_path, list_with_tags_file), "w"
+        ) as f:
+            for uuid in watches:
+                url = watches[uuid].get('url')
+                tag = watches[uuid].get('tags', {})
+                f.write("{} {}\r\n".format(url, tag))
+
+        # Add it to the Zip
+        zipObj.write(
+            os.path.join(datastore_path, list_file),
+            arcname=list_file,
+            compress_type=zipfile.ZIP_DEFLATED,
+            compresslevel=8,
+        )
+        zipObj.write(
+            os.path.join(datastore_path, list_with_tags_file),
+            arcname=list_with_tags_file,
+            compress_type=zipfile.ZIP_DEFLATED,
+            compresslevel=8,
+        )
+
+    # Now it's done, rename it so it shows up finally and its completed being written.
+    os.rename(backup_filepath.replace('.zip', '.tmp'), backup_filepath.replace('.tmp', '.zip'))
+
+
+def construct_blueprint(datastore: ChangeDetectionStore):
+    backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
+    backup_threads = []
+
+    @login_optionally_required
+    @backups_blueprint.route("/request-backup", methods=['GET'])
+    def request_backup():
+        if any(thread.is_alive() for thread in backup_threads):
+            flash("A backup is already running, check back in a few minutes", "error")
+            return redirect(url_for('backups.index'))
+
+        if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
+            flash("Maximum number of backups reached, please remove some", "error")
+            return redirect(url_for('backups.index'))
+
+        # Be sure we're written fresh
+        datastore.sync_to_json()
+        zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching")))
+        zip_thread.start()
+        backup_threads.append(zip_thread)
+        flash("Backup building in background, check back in a few minutes.")
+
+        return redirect(url_for('backups.index'))
+
+    def find_backups():
+        backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
+        backups = glob.glob(backup_filepath)
+        backup_info = []
+
+        for backup in backups:
+            size = os.path.getsize(backup) / (1024 * 1024)
+            creation_time = os.path.getctime(backup)
+            backup_info.append({
+                'filename': os.path.basename(backup),
+                'filesize': f"{size:.2f}",
+                'creation_time': creation_time
+            })
+
+        backup_info.sort(key=lambda x: x['creation_time'], reverse=True)
+
+        return backup_info
+
+    @login_optionally_required
+    @backups_blueprint.route("/download/<string:filename>", methods=['GET'])
+    def download_backup(filename):
+        import re
+        filename = filename.strip()
+        backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
+
+        full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
+        if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
+            abort(404)
+
+        if filename == 'latest':
+            backups = find_backups()
+            filename = backups[0]['filename']
+
+        if not re.match(r"^" + backup_filename_regex + "$", filename):
+            abort(400)  # Bad Request if the filename doesn't match the pattern
+
+        logger.debug(f"Backup download request for '{full_path}'")
+        return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
+
+    @login_optionally_required
+    @backups_blueprint.route("/", methods=['GET'])
+    def index():
+        backups = find_backups()
+        output = render_template("overview.html",
+                                 available_backups=backups,
+                                 backup_running=any(thread.is_alive() for thread in backup_threads)
+                                 )
+
+        return output
+
+    @login_optionally_required
+    @backups_blueprint.route("/remove-backups", methods=['GET'])
+    def remove_backups():
+
+        backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
+        backups = glob.glob(backup_filepath)
+        for backup in backups:
+            os.unlink(backup)
+
+        flash("Backups were deleted.")
+
+        return redirect(url_for('backups.index'))
+
+    return backups_blueprint
@@ -0,0 +1,36 @@
+{% extends 'base.html' %}
+{% block content %}
+    {% from '_helpers.html' import render_simple_field, render_field %}
+    <div class="edit-form">
+        <div class="box-wrap inner">
+            <h4>Backups</h4>
+            {% if backup_running %}
+                <p>
+                    <strong>A backup is running!</strong>
+                </p>
+            {% endif %}
+            <p>
+                Here you can download and request a new backup, when a backup is completed you will see it listed below.
+            </p>
+            <br>
+                {% if available_backups %}
+                    <ul>
+                    {% for backup in available_backups %}
+                        <li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{  backup["filesize"] }} Mb</li>
+                    {% endfor %}
+                    </ul>
+                {% else %}
+                    <p>
+                    <strong>No backups found.</strong>
+                    </p>
+                {% endif %}
+
+            <a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">Create backup</a>
+            {% if available_backups %}
+                <a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">Remove backups</a>
+            {% endif %}
+        </div>
+    </div>
+
+
+{% endblock %}
@@ -1,4 +1,7 @@
+import importlib
 from concurrent.futures import ThreadPoolExecutor
+
+from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 from changedetectionio.store import ChangeDetectionStore

 from functools import wraps
@@ -30,7 +33,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def long_task(uuid, preferred_proxy):
        import time
        from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
-        from changedetectionio.processors.text_json_diff import text_json_diff
        from changedetectionio.safe_jinja import render as jinja_render

        status = {'status': '', 'length': 0, 'text': ''}
@@ -38,8 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        contents = ''
        now = time.time()
        try:
-            update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
-            update_handler.call_browser()
+            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
+            update_handler = processor_module.perform_site_check(datastore=datastore,
+                                                                 watch_uuid=uuid
+                                                                 )
+
+            update_handler.call_browser(preferred_proxy_id=preferred_proxy)
        # title, size is len contents not len xfer
        except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
            if e.status_code == 404:
@@ -48,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
            else:
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
-        except text_json_diff.FilterNotFoundInResponse:
+        except FilterNotFoundInResponse:
            status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
        except content_fetcher_exceptions.EmptyReply as e:
            if e.status_code == 403 or e.status_code == 401:
@@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
        datastore.data['watching'][uuid]['processor'] = 'restock_diff'
        datastore.data['watching'][uuid].clear_watch()
-        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
        return redirect(url_for("index"))

    @login_required
@@ -17,7 +17,6 @@
 </script>

 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
-<!--<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>-->
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>

 <div class="edit-form monospaced-textarea">
@@ -75,6 +75,7 @@ class fetcher(Fetcher):
        self.headers = r.headers

        if not r.content or not len(r.content):
+            logger.debug(f"Requests returned empty content for '{url}'")
            if not empty_pages_are_a_change:
                raise EmptyReply(url=url, status_code=r.status_code)
            else:
@@ -30,6 +30,8 @@ function isItemInStock() {
        'dieser artikel ist bald wieder verfügbar',
        'dostępne wkrótce',
        'en rupture de stock',
+        'esgotado',
+        'indisponível',
        'isn\'t in stock right now',
        'isnt in stock right now',
        'isn’t in stock right now',
@@ -57,6 +59,7 @@ function isItemInStock() {
        'notify me when available',
        'notify me',
        'notify when available',
+        'não disponível',
        'não estamos a aceitar encomendas',
        'out of stock',
        'out-of-stock',
@@ -53,6 +53,7 @@ extra_stylesheets = []

 update_q = queue.PriorityQueue()
 notification_q = queue.Queue()
+MAX_QUEUE_SIZE = 2000

 app = Flask(__name__,
            static_url_path="",
@@ -67,7 +68,6 @@ FlaskCompress(app)

 # Stop browser caching of assets
 app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
-
 app.config.exit = Event()

 app.config['NEW_VERSION_AVAILABLE'] = False
@@ -470,7 +470,7 @@ def changedetection_app(config=None, datastore_o=None):
                    continue
            if watch.get('last_error'):
                errored_count += 1
-                
+
            if search_q:
                if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower():
                    sorted_watches.append(watch)
@@ -533,24 +533,32 @@ def changedetection_app(config=None, datastore_o=None):
    @login_optionally_required
    def ajax_callback_send_notification_test(watch_uuid=None):

-        # Watch_uuid could be unset in the case its used in tag editor, global setings
+        # Watch_uuid could be unset in the case it`s used in tag editor, global settings
        import apprise
        import random
        from .apprise_asset import asset
        apobj = apprise.Apprise(asset=asset)
+
        # so that the custom endpoints are registered
        from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
        is_global_settings_form = request.args.get('mode', '') == 'global-settings'
        is_group_settings_form = request.args.get('mode', '') == 'group-settings'

        # Use an existing random one on the global/main settings form
-        if not watch_uuid and (is_global_settings_form or is_group_settings_form):
+        if not watch_uuid and (is_global_settings_form or is_group_settings_form) \
+                and datastore.data.get('watching'):
            logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}")
            watch_uuid = random.choice(list(datastore.data['watching'].keys()))

+        if not watch_uuid:
+            return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
+
        watch = datastore.data['watching'].get(watch_uuid)

-        notification_urls = request.form['notification_urls'].strip().splitlines()
+        notification_urls = None
+
+        if request.form.get('notification_urls'):
+            notification_urls = request.form['notification_urls'].strip().splitlines()

        if not notification_urls:
            logger.debug("Test notification - Trying by group/tag in the edit form if available")
@@ -568,12 +576,12 @@ def changedetection_app(config=None, datastore_o=None):


        if not notification_urls:
-            return 'No Notification URLs set/found'
+            return 'Error: No Notification URLs set/found'

        for n_url in notification_urls:
            if len(n_url.strip()):
                if not apobj.add(n_url):
-                    return f'Error - {n_url} is not a valid AppRise URL.'
+                    return f'Error:  {n_url} is not a valid AppRise URL.'

        try:
            # use the same as when it is triggered, but then override it with the form test values
@@ -592,11 +600,13 @@ def changedetection_app(config=None, datastore_o=None):
            if 'notification_body' in request.form and request.form['notification_body'].strip():
                n_object['notification_body'] = request.form.get('notification_body', '').strip()

+            n_object.update(watch.extra_notification_token_values())
+
            from . import update_worker
            new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
            new_worker.queue_notification_for_watch(notification_q=notification_q, n_object=n_object, watch=watch)
        except Exception as e:
-            return make_response({'error': str(e)}, 400)
+            return make_response(f"Error: str(e)", 400)

        return 'OK - Sent test notifications'

@@ -788,15 +798,15 @@ def changedetection_app(config=None, datastore_o=None):
            # Recast it if need be to right data Watch handler
            watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
            datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
-
            flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")

            # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
            # But in the case something is added we should save straight away
            datastore.needs_write_urgent = True

-            # Queue the watch for immediate recheck, with a higher priority
-            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+            if not datastore.data['watching'][uuid].get('paused'):
+                # Queue the watch for immediate recheck, with a higher priority
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

            # Diff page [edit] link should go back to diff page
            if request.args.get("next") and request.args.get("next") == 'diff':
@@ -977,7 +987,7 @@ def changedetection_app(config=None, datastore_o=None):
                importer = import_url_list()
                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
                for uuid in importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

                if len(importer.remaining_data) == 0:
                    return redirect(url_for('index'))
@@ -990,7 +1000,7 @@ def changedetection_app(config=None, datastore_o=None):
                d_importer = import_distill_io_json()
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                for uuid in d_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

            # XLSX importer
            if request.files and request.files.get('xlsx_file'):
@@ -1014,7 +1024,7 @@ def changedetection_app(config=None, datastore_o=None):
                    w_importer.run(data=file, flash=flash, datastore=datastore)

                for uuid in w_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

        # Could be some remaining, or we could be on GET
        form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
@@ -1227,78 +1237,6 @@ def changedetection_app(config=None, datastore_o=None):

        return output

-    # We're good but backups are even better!
-    @app.route("/backup", methods=['GET'])
-    @login_optionally_required
-    def get_backup():
-
-        import zipfile
-        from pathlib import Path
-
-        # Remove any existing backup file, for now we just keep one file
-
-        for previous_backup_filename in Path(datastore_o.datastore_path).rglob('changedetection-backup-*.zip'):
-            os.unlink(previous_backup_filename)
-
-        # create a ZipFile object
-        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
-        backupname = "changedetection-backup-{}.zip".format(timestamp)
-        backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
-
-        with zipfile.ZipFile(backup_filepath, "w",
-                             compression=zipfile.ZIP_DEFLATED,
-                             compresslevel=8) as zipObj:
-
-            # Be sure we're written fresh
-            datastore.sync_to_json()
-
-            # Add the index
-            zipObj.write(os.path.join(datastore_o.datastore_path, "url-watches.json"), arcname="url-watches.json")
-
-            # Add the flask app secret
-            zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
-
-            # Add any data in the watch data directory.
-            for uuid, w in datastore.data['watching'].items():
-                for f in Path(w.watch_data_dir).glob('*'):
-                    zipObj.write(f,
-                                 # Use the full path to access the file, but make the file 'relative' in the Zip.
-                                 arcname=os.path.join(f.parts[-2], f.parts[-1]),
-                                 compress_type=zipfile.ZIP_DEFLATED,
-                                 compresslevel=8)
-
-            # Create a list file with just the URLs, so it's easier to port somewhere else in the future
-            list_file = "url-list.txt"
-            with open(os.path.join(datastore_o.datastore_path, list_file), "w") as f:
-                for uuid in datastore.data["watching"]:
-                    url = datastore.data["watching"][uuid]["url"]
-                    f.write("{}\r\n".format(url))
-            list_with_tags_file = "url-list-with-tags.txt"
-            with open(
-                os.path.join(datastore_o.datastore_path, list_with_tags_file), "w"
-            ) as f:
-                for uuid in datastore.data["watching"]:
-                    url = datastore.data["watching"][uuid].get('url')
-                    tag = datastore.data["watching"][uuid].get('tags', {})
-                    f.write("{} {}\r\n".format(url, tag))
-
-            # Add it to the Zip
-            zipObj.write(
-                os.path.join(datastore_o.datastore_path, list_file),
-                arcname=list_file,
-                compress_type=zipfile.ZIP_DEFLATED,
-                compresslevel=8,
-            )
-            zipObj.write(
-                os.path.join(datastore_o.datastore_path, list_with_tags_file),
-                arcname=list_with_tags_file,
-                compress_type=zipfile.ZIP_DEFLATED,
-                compresslevel=8,
-            )
-
-        # Send_from_directory needs to be the full absolute path
-        return send_from_directory(os.path.abspath(datastore_o.datastore_path), backupname, as_attachment=True)
-
    @app.route("/static/<string:group>/<string:filename>", methods=['GET'])
    def static_content(group, filename):
        from flask import make_response
@@ -1397,7 +1335,7 @@ def changedetection_app(config=None, datastore_o=None):
        url = request.form.get('url').strip()
        if datastore.url_exists(url):
            flash(f'Warning, URL {url} already exists', "notice")
-            
+
        add_paused = request.form.get('edit_and_watch_submit_button') != None
        processor = request.form.get('processor', 'text_json_diff')
        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
@@ -1443,7 +1381,7 @@ def changedetection_app(config=None, datastore_o=None):
        new_uuid = datastore.clone(uuid)
        if new_uuid:
            if not datastore.data['watching'].get(uuid).get('paused'):
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
            flash('Cloned.')

        return redirect(url_for('index'))
@@ -1464,7 +1402,7 @@ def changedetection_app(config=None, datastore_o=None):

        if uuid:
            if uuid not in running_uuids:
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            i = 1

        elif tag:
@@ -1475,7 +1413,7 @@ def changedetection_app(config=None, datastore_o=None):
                        continue
                    if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                        update_q.put(
-                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
+                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
                        )
                        i += 1

@@ -1485,9 +1423,8 @@ def changedetection_app(config=None, datastore_o=None):
                if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                    if with_errors and not watch.get('last_error'):
                        continue
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
                    i += 1
-
        flash(f"{i} watches queued for rechecking.")
        return redirect(url_for('index', tag=tag))

@@ -1544,7 +1481,7 @@ def changedetection_app(config=None, datastore_o=None):
                uuid = uuid.strip()
                if datastore.data['watching'].get(uuid):
                    # Recheck and require a full reprocessing
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            flash("{} watches queued for rechecking".format(len(uuids)))

        elif (op == 'clear-errors'):
@@ -1679,6 +1616,9 @@ def changedetection_app(config=None, datastore_o=None):
    import changedetectionio.blueprint.check_proxies as check_proxies
    app.register_blueprint(check_proxies.construct_blueprint(datastore=datastore), url_prefix='/check_proxy')

+    import changedetectionio.blueprint.backups as backups
+    app.register_blueprint(backups.construct_blueprint(datastore), url_prefix='/backups')
+

    # @todo handle ctrl break
    ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
@@ -1803,12 +1743,14 @@ def ticker_thread_check_time_launch_checks():
            except RuntimeError as e:
                # RuntimeError: dictionary changed size during iteration
                time.sleep(0.1)
+                watch_uuid_list = []
            else:
                break

        # Re #438 - Don't place more watches in the queue to be checked if the queue is already large
        while update_q.qsize() >= 2000:
-            time.sleep(1)
+            logger.warning(f"Recheck watches queue size limit reached ({MAX_QUEUE_SIZE}), skipping adding more items")
+            time.sleep(3)


        recheck_time_system_seconds = int(datastore.threshold_seconds)
@@ -1868,7 +1810,7 @@ def ticker_thread_check_time_launch_checks():
                        f"{now - watch['last_checked']:0.2f}s since last checked")

                    # Into the queue with you
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))

                    # Reset for next time
                    watch.jitter_seconds = 0
@@ -476,7 +476,7 @@ class processor_text_json_diff_form(commonSettingsForm):

    title = StringField('Title', default='')

-    ignore_text = StringListField('Remove lines containing', [ValidateListRegex()])
+    ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()])
    headers = StringDictKeyValue('Request headers')
    body = TextAreaField('Request body', [validators.Optional()])
    method = SelectField('Request method', choices=valid_method, default=default_method)
@@ -496,7 +496,7 @@ class processor_text_json_diff_form(commonSettingsForm):
    text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
    webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])

-    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
+    save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})

    proxy = RadioField('Proxy')
    filter_failure_notification_send = BooleanField(
@@ -515,6 +515,7 @@ class processor_text_json_diff_form(commonSettingsForm):
        if not super().validate():
            return False

+        from changedetectionio.safe_jinja import render as jinja_render
        result = True

        # Fail form validation when a body is set for a GET
@@ -524,18 +525,46 @@ class processor_text_json_diff_form(commonSettingsForm):

        # Attempt to validate jinja2 templates in the URL
        try:
-            from changedetectionio.safe_jinja import render as jinja_render
            jinja_render(template_str=self.url.data)
        except ModuleNotFoundError as e:
            # incase jinja2_time or others is missing
            logger.error(e)
-            self.url.errors.append(e)
+            self.url.errors.append(f'Invalid template syntax configuration: {e}')
            result = False
        except Exception as e:
            logger.error(e)
-            self.url.errors.append('Invalid template syntax')
+            self.url.errors.append(f'Invalid template syntax: {e}')
            result = False

+        # Attempt to validate jinja2 templates in the body
+        if self.body.data and self.body.data.strip():
+            try:
+                jinja_render(template_str=self.body.data)
+            except ModuleNotFoundError as e:
+                # incase jinja2_time or others is missing
+                logger.error(e)
+                self.body.errors.append(f'Invalid template syntax configuration: {e}')
+                result = False
+            except Exception as e:
+                logger.error(e)
+                self.body.errors.append(f'Invalid template syntax: {e}')
+                result = False
+
+        # Attempt to validate jinja2 templates in the headers
+        if len(self.headers.data) > 0:
+            try:
+                for header, value in self.headers.data.items():
+                    jinja_render(template_str=value)
+            except ModuleNotFoundError as e:
+                # incase jinja2_time or others is missing
+                logger.error(e)
+                self.headers.errors.append(f'Invalid template syntax configuration: {e}')
+                result = False
+            except Exception as e:
+                logger.error(e)
+                self.headers.errors.append(f'Invalid template syntax in "{header}" header: {e}')
+                result = False
+
        return result

 class SingleExtraProxy(Form):
@@ -616,7 +645,7 @@ class globalSettingsForm(Form):

    requests = FormField(globalSettingsRequestForm)
    application = FormField(globalSettingsApplicationForm)
-    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
+    save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})


 class extractDataForm(Form):
@@ -3,11 +3,11 @@ from lxml import etree
 import json
 import re

-
 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
-
+TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
 PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
+
 # 'price' , 'lowPrice', 'highPrice' are usually under here
 # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
 LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
@@ -54,29 +54,64 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
 def subtractive_css_selector(css_selector, html_content):
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html_content, "html.parser")
-    for item in soup.select(css_selector):
+
+    # So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
+    elements_to_remove = soup.select(css_selector)
+
+    # Then, remove them in a separate loop
+    for item in elements_to_remove:
        item.decompose()
+
    return str(soup)

-def subtractive_xpath_selector(xpath_selector, html_content): 
+def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
+    # Parse the HTML content using lxml
    html_tree = etree.HTML(html_content)
-    elements_to_remove = html_tree.xpath(xpath_selector)

+    # First, collect all elements to remove
+    elements_to_remove = []
+
+    # Iterate over the list of XPath selectors
+    for selector in selectors:
+        # Collect elements for each selector
+        elements_to_remove.extend(html_tree.xpath(selector))
+
+    # Then, remove them in a separate loop
    for element in elements_to_remove:
-        element.getparent().remove(element)
+        if element.getparent() is not None:  # Ensure the element has a parent before removing
+            element.getparent().remove(element)

+    # Convert the modified HTML tree back to a string
    modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
    return modified_html

+
 def element_removal(selectors: List[str], html_content):
-    """Removes elements that match a list of CSS or xPath selectors."""
+    """Removes elements that match a list of CSS or XPath selectors."""
    modified_html = html_content
+    css_selectors = []
+    xpath_selectors = []
+
    for selector in selectors:
        if selector.startswith(('xpath:', 'xpath1:', '//')):
+            # Handle XPath selectors separately
            xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
-            modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
+            xpath_selectors.append(xpath_selector)
        else:
-            modified_html = subtractive_css_selector(selector, modified_html)
+            # Collect CSS selectors as one "hit", see comment in subtractive_css_selector
+            css_selectors.append(selector.strip().strip(","))
+
+    if xpath_selectors:
+        modified_html = subtractive_xpath_selector(xpath_selectors, modified_html)
+
+    if css_selectors:
+        # Remove duplicates, then combine all CSS selectors into one string, separated by commas
+        # This stops the elements index shifting
+        unique_selectors = list(set(css_selectors))  # Ensure uniqueness
+        combined_css_selector = " , ".join(unique_selectors)
+        modified_html = subtractive_css_selector(combined_css_selector, modified_html)
+
+
    return modified_html

 def elementpath_tostring(obj):
@@ -326,6 +361,7 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
 #          - "line numbers" return a list of line numbers that match (int list)
 #
 # wordlist - list of regex's (str) or words (str)
+# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
 def strip_ignore_text(content, wordlist, mode="content"):
    i = 0
    output = []
@@ -341,32 +377,30 @@ def strip_ignore_text(content, wordlist, mode="content"):
        else:
            ignore_text.append(k.strip())

-    for line in content.splitlines():
+    for line in content.splitlines(keepends=True):
        i += 1
        # Always ignore blank lines in this mode. (when this function gets called)
        got_match = False
-        if len(line.strip()):
-            for l in ignore_text:
-                if l.lower() in line.lower():
+        for l in ignore_text:
+            if l.lower() in line.lower():
+                got_match = True
+
+        if not got_match:
+            for r in ignore_regex:
+                if r.search(line):
                    got_match = True

-            if not got_match:
-                for r in ignore_regex:
-                    if r.search(line):
-                        got_match = True
-
-            if not got_match:
-                # Not ignored
-                output.append(line.encode('utf8'))
-            else:
-                ignored_line_numbers.append(i)
-
+        if not got_match:
+            # Not ignored, and should preserve "keepends"
+            output.append(line)
+        else:
+            ignored_line_numbers.append(i)

    # Used for finding out what to highlight
    if mode == "line numbers":
        return ignored_line_numbers

-    return "\n".encode('utf8').join(output)
+    return ''.join(output)

 def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
    from xml.sax.saxutils import escape as xml_escape
@@ -6,6 +6,8 @@ import re
 from pathlib import Path
 from loguru import logger

+from ..html_tools import TRANSLATE_WHITESPACE_TABLE
+
 # Allowable protocols, protects against javascript: etc
 # file:// is further checked by ALLOW_FILE_URI
 SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
@@ -87,6 +89,10 @@ class model(watch_base):

        if ready_url.startswith('source:'):
            ready_url=ready_url.replace('source:', '')
+
+        # Also double check it after any Jinja2 formatting just incase
+        if not is_safe_url(ready_url):
+            return 'DISABLED'
        return ready_url

    def clear_watch(self):
@@ -312,13 +318,13 @@ class model(watch_base):
            dest = os.path.join(self.watch_data_dir, snapshot_fname)
            if not os.path.exists(dest):
                with open(dest, 'wb') as f:
-                    f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
+                    f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
        else:
            snapshot_fname = f"{snapshot_id}.txt"
            dest = os.path.join(self.watch_data_dir, snapshot_fname)
            if not os.path.exists(dest):
                with open(dest, 'wb') as f:
-                    f.write(contents)
+                    f.write(contents.encode('utf-8'))

        # Append to index
        # @todo check last char was \n
@@ -350,14 +356,32 @@ class model(watch_base):
        return seconds

    # Iterate over all history texts and see if something new exists
-    def lines_contain_something_unique_compared_to_history(self, lines: list):
-        local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
+    # Always applying .strip() to start/end but optionally replace any other whitespace
+    def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
+        local_lines = []
+        if lines:
+            if ignore_whitespace:
+                if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
+                    local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
+                else:
+                    local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
+            else:
+                if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
+                    local_lines = set([l.strip().lower() for l in lines])
+                else:
+                    local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
+

        # Compare each lines (set) against each history text file (set) looking for something new..
        existing_history = set({})
        for k, v in self.history.items():
            content = self.get_history_snapshot(k)
-            alist = set([line.strip().lower() for line in content.splitlines()])
+
+            if ignore_whitespace:
+                alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
+            else:
+                alist = set([line.strip().lower() for line in content.splitlines()])
+
            existing_history = existing_history.union(alist)

        # Check that everything in local_lines(new stuff) already exists in existing_history - it should
@@ -18,6 +18,7 @@ class difference_detection_processor():
    screenshot = None
    watch = None
    xpath_data = None
+    preferred_proxy = None

    def __init__(self, *args, datastore, watch_uuid, **kwargs):
        super().__init__(*args, **kwargs)
@@ -26,23 +27,24 @@ class difference_detection_processor():
        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()

-    def call_browser(self):
+    def call_browser(self, preferred_proxy_id=None):
+
        from requests.structures import CaseInsensitiveDict

-        # Protect against file:// access
-        if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
+        url = self.watch.link
+
+        # Protect against file://, file:/ access, check the real "link" without any meta "source:" etc prepended.
+        if re.search(r'^file:/', url.strip(), re.IGNORECASE):
            if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
                raise Exception(
                    "file:// type access is denied for security reasons."
                )

-        url = self.watch.link
-
        # Requests, playwright, other browser via wss:// etc, fetch_extra_something
        prefer_fetch_backend = self.watch.get('fetch_backend', 'system')

        # Proxy ID "key"
-        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
+        preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))

        # Pluggable content self.fetcher
        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
@@ -100,6 +102,7 @@ class difference_detection_processor():
            self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))

        # Tweak the base config with the per-watch ones
+        from changedetectionio.safe_jinja import render as jinja_render
        request_headers = CaseInsensitiveDict()

        ua = self.datastore.data['settings']['requests'].get('default_ua')
@@ -116,9 +119,15 @@ class difference_detection_processor():
        if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
            request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')

+        for header_name in request_headers:
+            request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))})
+
        timeout = self.datastore.data['settings']['requests'].get('timeout')

        request_body = self.watch.get('body')
+        if request_body:
+            request_body = jinja_render(template_str=self.watch.get('body'))
+        
        request_method = self.watch.get('method')
        ignore_status_codes = self.watch.get('ignore_status_codes', False)

@@ -155,7 +164,7 @@ class difference_detection_processor():
        # After init, call run_changedetection() which will do the actual change-detection

    @abstractmethod
-    def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
+    def run_changedetection(self, watch):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
@@ -27,22 +27,27 @@ def _search_prop_by_value(matches, value):
                return prop[1]  # Yield the desired value and exit the function

 def _deduplicate_prices(data):
-    seen = set()
-    unique_data = []
+    import re

+    '''
+    Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
+    Get all the values, clean it and add it to a set then return the unique values
+    '''
+    unique_data = set()
+
+    # Return the complete 'datum' where its price was not seen before
    for datum in data:
-        # Convert 'value' to float if it can be a numeric string, otherwise leave it as is
-        try:
-            normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
-        except ValueError:
-            normalized_value = datum.value

-        # If the normalized value hasn't been seen yet, add it to unique data
-        if normalized_value not in seen:
-            unique_data.append(datum)
-            seen.add(normalized_value)
-    
-    return unique_data
+        if isinstance(datum.value, list):
+            # Process each item in the list
+            normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value if str(item).strip()])
+            unique_data.update(normalized_value)
+        else:
+            # Process single value
+            v = float(re.sub(r'[^\d.]', '', str(datum.value)))
+            unique_data.add(v)
+
+    return list(unique_data)


 # should return Restock()
@@ -83,14 +88,13 @@ def get_itemprop_availability(html_content) -> Restock:
        if price_result:
            # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
            # parse that for the UI?
-            prices_found = set(str(item.value).replace('$', '') for item in price_result)
-            if len(price_result) > 1 and len(prices_found) > 1:
+            if len(price_result) > 1 and len(price_result) > 1:
                # See of all prices are different, in the case that one product has many embedded data types with the same price
                # One might have $121.95 and another 121.95 etc
-                logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
+                logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
                raise MoreThanOnePriceFound()

-            value['price'] = price_result[0].value
+            value['price'] = price_result[0]

        pricecurrency_result = pricecurrency_parse.find(data)
        if pricecurrency_result:
@@ -140,7 +144,7 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

-    def run_changedetection(self, watch, skip_when_checksum_same=True):
+    def run_changedetection(self, watch):
        import hashlib

        if not watch:
@@ -220,7 +224,7 @@ class perform_site_check(difference_detection_processor):
            itemprop_availability['original_price'] = itemprop_availability.get('price')
            update_obj['restock']["original_price"] = itemprop_availability.get('price')

-        if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
+        if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
            raise ProcessorException(
                message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
                url=watch.get('url'),
@@ -307,4 +311,4 @@ class perform_site_check(difference_detection_processor):
        # Always record the new checksum
        update_obj["previous_md5"] = fetched_md5

-        return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
+        return changed_detected, update_obj, snapshot_content.strip()
@@ -11,10 +11,7 @@ def _task(watch, update_handler):

    try:
        # The slow process (we run 2 of these in parallel)
-        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
-            watch=watch,
-            skip_when_checksum_same=False,
-        )
+        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
    except FilterNotFoundInResponse as e:
        text_after_filter = f"Filter not found in HTML: {str(e)}"
    except ReplyWithContentButNoText as e:
@@ -46,6 +43,9 @@ def prepare_filter_prevew(datastore, watch_uuid):

    text_after_filter = ''
    text_before_filter = ''
+    trigger_line_numbers = []
+    ignore_line_numbers = []
+
    tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))

    if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
@@ -72,7 +72,7 @@ def prepare_filter_prevew(datastore, watch_uuid):
                                                                 )
            # Use the last loaded HTML as the input
            update_handler.datastore = datastore
-            update_handler.fetcher.content = decompressed_data
+            update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
            update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')

            # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
@@ -84,9 +84,7 @@ def prepare_filter_prevew(datastore, watch_uuid):
                text_after_filter = future1.result()
                text_before_filter = future2.result()

-    trigger_line_numbers = []
    try:
-
        trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
                                                            wordlist=tmp_watch['trigger_text'],
                                                            mode='line numbers'
@@ -94,6 +92,15 @@ def prepare_filter_prevew(datastore, watch_uuid):
    except Exception as e:
        text_before_filter = f"Error: {str(e)}"

+    try:
+        text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
+        ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
+                                                           wordlist=text_to_ignore,
+                                                           mode='line numbers'
+                                                           )
+    except Exception as e:
+        text_before_filter = f"Error: {str(e)}"
+
    logger.trace(f"Parsed in {time.time() - now:.3f}s")

    return jsonify(
@@ -102,6 +109,7 @@ def prepare_filter_prevew(datastore, watch_uuid):
            'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
            'duration': time.time() - now,
            'trigger_line_numbers': trigger_line_numbers,
+            'ignore_line_numbers': ignore_line_numbers,
        }
    )

@@ -7,7 +7,7 @@ import re
 import urllib3

 from changedetectionio.processors import difference_detection_processor
-from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
+from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
 from changedetectionio import html_tools, content_fetchers
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from loguru import logger
@@ -35,8 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

-    def run_changedetection(self, watch, skip_when_checksum_same=True):
-
+    def run_changedetection(self, watch):
        changed_detected = False
        html_content = ""
        screenshot = False  # as bytes
@@ -59,9 +58,6 @@ class perform_site_check(difference_detection_processor):
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
        # Saves a lot of CPU
        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
-        if skip_when_checksum_same:
-            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
-                raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()

        # Fetching complete, now filters

@@ -205,22 +201,14 @@ class perform_site_check(difference_detection_processor):
        if watch.get('trim_text_whitespace'):
            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())

-        if watch.get('remove_duplicate_lines'):
-            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
-
-        if watch.get('sort_text_alphabetically'):
-            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
-            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
-            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
-            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
-
        # Re #340 - return the content before the 'ignore text' was applied
        # Also used to calculate/show what was removed
-        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
+        text_content_before_ignored_filter = stripped_text_from_html

        # @todo whitespace coming from missing rtrim()?
        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
        # Rewrite's the processing text based on only what diff result they want to see
+
        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
            from changedetectionio import diff
@@ -235,12 +223,12 @@ class perform_site_check(difference_detection_processor):
                                             line_feed_sep="\n",
                                             include_change_type_prefix=False)

-            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)
+            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))

            if not rendered_diff and stripped_text_from_html:
                # We had some content, but no differences were found
                # Store our new file as the MD5 so it will trigger in the future
-                c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest()
+                c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
            else:
                stripped_text_from_html = rendered_diff
@@ -261,14 +249,6 @@ class perform_site_check(difference_detection_processor):

        update_obj["last_check_status"] = self.fetcher.get_last_status_code()

-        # If there's text to skip
-        # @todo we could abstract out the get_text() to handle this cleaner
-        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
-        if len(text_to_ignore):
-            stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
-        else:
-            stripped_text_from_html = stripped_text_from_html.encode('utf8')
-
        # 615 Extract text by regex
        extract_text = watch.get('extract_text', [])
        if len(extract_text) > 0:
@@ -277,39 +257,53 @@ class perform_site_check(difference_detection_processor):
                # incase they specified something in '/.../x'
                if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
                    regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
-                    result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
+                    result = re.findall(regex, stripped_text_from_html)

                    for l in result:
                        if type(l) is tuple:
                            # @todo - some formatter option default (between groups)
-                            regex_matched_output += list(l) + [b'\n']
+                            regex_matched_output += list(l) + ['\n']
                        else:
                            # @todo - some formatter option default (between each ungrouped result)
-                            regex_matched_output += [l] + [b'\n']
+                            regex_matched_output += [l] + ['\n']
                else:
                    # Doesnt look like regex, just hunt for plaintext and return that which matches
                    # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
-                    r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE)
+                    r = re.compile(re.escape(s_re), re.IGNORECASE)
                    res = r.findall(stripped_text_from_html)
                    if res:
                        for match in res:
-                            regex_matched_output += [match] + [b'\n']
+                            regex_matched_output += [match] + ['\n']

            ##########################################################
-            stripped_text_from_html = b''
-            text_content_before_ignored_filter = b''
+            stripped_text_from_html = ''
+
            if regex_matched_output:
                # @todo some formatter for presentation?
-                stripped_text_from_html = b''.join(regex_matched_output)
-                text_content_before_ignored_filter = stripped_text_from_html
+                stripped_text_from_html = ''.join(regex_matched_output)
+
+        if watch.get('remove_duplicate_lines'):
+            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))


+        if watch.get('sort_text_alphabetically'):
+            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
+            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
+            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
+            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+
+### CALCULATE MD5
+        # If there's text to ignore
+        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
+        text_for_checksuming = stripped_text_from_html
+        if text_to_ignore:
+            text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)

        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
-        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
-            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
+        if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
+            fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
        else:
-            fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
+            fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()

        ############ Blocking rules, after checksum #################
        blocked = False
@@ -337,19 +331,33 @@ class perform_site_check(difference_detection_processor):
            if result:
                blocked = True

-        # The main thing that all this at the moment comes down to :)
-        if watch.get('previous_md5') != fetched_md5:
-            changed_detected = True

        # Looks like something changed, but did it match all the rules?
        if blocked:
            changed_detected = False
+        else:
+            # The main thing that all this at the moment comes down to :)
+            if watch.get('previous_md5') != fetched_md5:
+                changed_detected = True
+
+            # Always record the new checksum
+            update_obj["previous_md5"] = fetched_md5
+
+            # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
+            if not watch.get('previous_md5'):
+                watch['previous_md5'] = fetched_md5

        logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")

        if changed_detected:
            if watch.get('check_unique_lines', False):
-                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
+                ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
+
+                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
+                    lines=stripped_text_from_html.splitlines(),
+                    ignore_whitespace=ignore_whitespace
+                )
+
                # One or more lines? unsure?
                if not has_unique_lines:
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
@@ -357,12 +365,6 @@ class perform_site_check(difference_detection_processor):
                else:
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")

-        # Always record the new checksum
-        update_obj["previous_md5"] = fetched_md5
-
-        # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
-        if not watch.get('previous_md5'):
-            watch['previous_md5'] = fetched_md5

        # stripped_text_from_html - Everything after filters and NO 'ignored' content
        return changed_detected, update_obj, stripped_text_from_html
@@ -28,17 +28,14 @@ $(document).ready(function() {
      url: notification_base_url,
      data : data,
        statusCode: {
-        400: function() {
-            // More than likely the CSRF token was lost when the server restarted
-          alert("There was a problem processing the request, please reload the page.");
+        400: function(data) {
+          // More than likely the CSRF token was lost when the server restarted
+          alert(data.responseText);
        }
      }
    }).done(function(data){
      console.log(data);
      alert(data);
-    }).fail(function(data){
-      console.log(data);
-      alert('There was an error communicating with the server.');
    })
  });
 });
@@ -1,14 +1,14 @@
 $(function () {
    /* add container before each proxy location to show status */
-
-    var option_li = $('.fetch-backend-proxy li').filter(function() {
-        return $("input",this)[0].value.length >0;
-    });
-
-    //var option_li = $('.fetch-backend-proxy li');
    var isActive = false;
-    $(option_li).prepend('<div class="proxy-status"></div>');
-    $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
+
+    function setup_html_widget() {
+        var option_li = $('.fetch-backend-proxy li').filter(function () {
+            return $("input", this)[0].value.length > 0;
+        });
+        $(option_li).prepend('<div class="proxy-status"></div>');
+        $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
+    }

    function set_proxy_check_status(proxy_key, state) {
        // select input by value name
@@ -59,8 +59,14 @@ $(function () {
    }

    $('#check-all-proxies').click(function (e) {
+
        e.preventDefault()
-        $('body').addClass('proxy-check-active');
+
+        if (!$('body').hasClass('proxy-check-active')) {
+            setup_html_widget();
+            $('body').addClass('proxy-check-active');
+        }
+
        $('.proxy-check-details').html('');
        $('.proxy-status').html('<span class="spinner"></span>').fadeIn();
        $('.proxy-timing').html('');
@@ -26,8 +26,7 @@ function set_active_tab() {
    if (tab.length) {
        tab[0].parentElement.className = "active";
    }
-    // hash could move the page down
-    window.scrollTo(0, 0);
+
 }

 function focus_error_tab() {
@@ -42,8 +42,12 @@ function request_textpreview_update() {
                {
                    'color': '#ee0000',
                    'lines': data['trigger_line_numbers']
+                },
+                {
+                    'color': '#757575',
+                    'lines': data['ignore_line_numbers']
                }
-            ]);
+            ])
    }).fail(function (error) {
        if (error.statusText === 'abort') {
            console.log('Request was aborted due to a new request being fired.');
@@ -76,8 +80,8 @@ $(document).ready(function () {
        $('body').toggleClass('preview-text-enabled')
        request_textpreview_update();
        const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
-        $('textarea:visible')[method]('keyup blur', request_textpreview_update.throttle(1000));
-        $('input:visible')[method]('keyup blur change', request_textpreview_update.throttle(1000));
+        $('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000));
+        $('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000));
        $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000));
    });
    $('.minitabs-wrapper').miniTabs({
@@ -153,7 +153,8 @@ html[data-darkmode="true"] {
    border: 1px solid transparent;
    vertical-align: top;
    font: 1em monospace;
-    text-align: left; }
+    text-align: left;
+    overflow: clip; }
  #diff-ui pre {
    white-space: pre-wrap; }

@@ -172,7 +173,9 @@ ins {
  text-decoration: none; }

 #result {
-  white-space: pre-wrap; }
+  white-space: pre-wrap;
+  word-break: break-word;
+  overflow-wrap: break-word; }

 #settings {
  background: rgba(0, 0, 0, 0.05);
@@ -231,3 +234,12 @@ td#diff-col div {
  border-radius: 5px;
  background: var(--color-background);
  box-shadow: 1px 1px 4px var(--color-shadow-jump); }
+
+.pure-form button.reset-margin {
+  margin: 0px; }
+
+.diff-fieldset {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+  flex-wrap: wrap; }
@@ -24,6 +24,7 @@
    vertical-align: top;
    font: 1em monospace;
    text-align: left;
+    overflow: clip; // clip overflowing contents to cell boundariess
  }

  pre {
@@ -50,6 +51,8 @@ ins {

 #result {
  white-space: pre-wrap;
+  word-break: break-word;
+  overflow-wrap: break-word;

  .change {
    span {}
@@ -134,3 +137,15 @@ td#diff-col div {
  background: var(--color-background);
  box-shadow: 1px 1px 4px var(--color-shadow-jump);
 }
+
+// resets button margin to 0px
+.pure-form button.reset-margin {
+  margin: 0px;
+}
+
+.diff-fieldset {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+  flex-wrap: wrap;
+}
@@ -11,7 +11,22 @@ ul#requests-extra_browsers {
  /* each proxy entry is a `table` */
  table {
    tr {
-      display: inline;
+      display: table-row; // default display for small screens
+      input[type=text] {
+        width: 100%;
+      }
+    }
+  }
+  
+  // apply inline display for larger screens
+  @media only screen and (min-width: 1280px) {
+    table {
+      tr {
+        display: inline;
+        input[type=text] {
+          width: 100%;
+        }
+      }
    }
  }
 }
@@ -11,7 +11,19 @@ ul#requests-extra_proxies {
  /* each proxy entry is a `table` */
  table {
    tr {
-      display: inline;
+      display: table-row; // default display for small screens
+      input[type=text] {
+        width: 100%;
+      }
+    }
+  }
+  
+  // apply inline display for large screens
+  @media only screen and (min-width: 1024px) {
+    table {
+      tr {
+        display: inline;
+      }
    }
  }
 }
@@ -25,15 +37,19 @@ ul#requests-extra_proxies {

 body.proxy-check-active {
  #request {
+    // Padding set by flex layout
+    /*
    .proxy-status {
      width: 2em;
    }
+    */

    .proxy-check-details {
      font-size: 80%;
      color: #555;
      display: block;
-      padding-left: 4em;
+      padding-left: 2em;
+      max-width: 500px;
    }

    .proxy-timing {
@@ -147,8 +147,14 @@ body.spinner-active {
  }
 }

+
+.tab-pane-inner {
+  // .tab-pane-inner will have the #id that the tab button jumps/anchors to
+  scroll-margin-top: 200px;
+}
+
 section.content {
-  padding-top: 5em;
+  padding-top: 100px;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
@@ -931,6 +937,7 @@ $form-edge-padding: 20px;
 }

 .tab-pane-inner {
+
  &:not(:target) {
    display: none;
  }
@@ -112,26 +112,34 @@ ul#requests-extra_proxies {
  ul#requests-extra_proxies li > label {
    display: none; }
  ul#requests-extra_proxies table tr {
-    display: inline; }
+    display: table-row; }
+    ul#requests-extra_proxies table tr input[type=text] {
+      width: 100%; }
+  @media only screen and (min-width: 1024px) {
+    ul#requests-extra_proxies table tr {
+      display: inline; } }

 #request {
  /* Auto proxy scan/checker */ }
  #request label[for=proxy] {
    display: inline-block; }

-body.proxy-check-active #request .proxy-status {
-  width: 2em; }
-
-body.proxy-check-active #request .proxy-check-details {
-  font-size: 80%;
-  color: #555;
-  display: block;
-  padding-left: 4em; }
-
-body.proxy-check-active #request .proxy-timing {
-  font-size: 80%;
-  padding-left: 1rem;
-  color: var(--color-link); }
+body.proxy-check-active #request {
+  /*
+    .proxy-status {
+      width: 2em;
+    }
+    */ }
+  body.proxy-check-active #request .proxy-check-details {
+    font-size: 80%;
+    color: #555;
+    display: block;
+    padding-left: 2em;
+    max-width: 500px; }
+  body.proxy-check-active #request .proxy-timing {
+    font-size: 80%;
+    padding-left: 1rem;
+    color: var(--color-link); }

 #recommended-proxy {
  display: grid;
@@ -158,7 +166,14 @@ ul#requests-extra_browsers {
  ul#requests-extra_browsers li > label {
    display: none; }
  ul#requests-extra_browsers table tr {
-    display: inline; }
+    display: table-row; }
+    ul#requests-extra_browsers table tr input[type=text] {
+      width: 100%; }
+  @media only screen and (min-width: 1280px) {
+    ul#requests-extra_browsers table tr {
+      display: inline; }
+      ul#requests-extra_browsers table tr input[type=text] {
+        width: 100%; } }

 #extra-browsers-setting {
  border: 1px solid var(--color-grey-800);
@@ -602,8 +617,11 @@ body.spinner-active #pure-menu-horizontal-spinner {
    background-color: var(--color-background-menu-link-hover);
    color: var(--color-text-menu-link-hover); }

+.tab-pane-inner {
+  scroll-margin-top: 200px; }
+
 section.content {
-  padding-top: 5em;
+  padding-top: 100px;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
@@ -4,6 +4,7 @@ from flask import (
    flash
 )

+from .html_tools import TRANSLATE_WHITESPACE_TABLE
 from . model import App, Watch
 from copy import deepcopy, copy
 from os import path, unlink
@@ -750,17 +751,17 @@ class ChangeDetectionStore:
    def update_5(self):
        # If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings
        # In other words - the watch notification_title and notification_body are not needed if they are the same as the default one
-        current_system_body = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
-        current_system_title = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
+        current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
+        current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
        for uuid, watch in self.data['watching'].items():
            try:
                watch_body = watch.get('notification_body', '')
-                if watch_body and watch_body.translate(str.maketrans('', '', "\r\n ")) == current_system_body:
+                if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
                    # Looks the same as the default one, so unset it
                    watch['notification_body'] = None

                watch_title = watch.get('notification_title', '')
-                if watch_title and watch_title.translate(str.maketrans('', '', "\r\n ")) == current_system_title:
+                if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
                    # Looks the same as the default one, so unset it
                    watch['notification_title'] = None
            except Exception as e:
@@ -70,7 +70,7 @@
                <a href="{{ url_for('import_page')}}" class="pure-menu-link">IMPORT</a>
              </li>
              <li class="pure-menu-item">
-                <a href="{{ url_for('get_backup')}}" class="pure-menu-link">BACKUP</a>
+                <a href="{{ url_for('backups.index')}}" class="pure-menu-link">BACKUPS</a>
              </li>
            {% else %}
              <li class="pure-menu-item">
@@ -14,7 +14,7 @@

 <div id="settings">
    <form class="pure-form " action="" method="GET" id="diff-form">
-        <fieldset>
+        <fieldset class="diff-fieldset">
            {% if versions|length >= 1 %}
                <strong>Compare</strong>
                <del class="change"><span>from</span></del>
@@ -33,7 +33,7 @@
                        </option>
                    {% endfor %}
                </select>
-                <button type="submit" class="pure-button pure-button-primary">Go</button>
+                <button type="submit" class="pure-button pure-button-primary reset-margin">Go</button>
            {% endif %}
        </fieldset>
        <fieldset>
@@ -26,7 +26,6 @@
 </script>
 <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
-<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
 {% if playwright_enabled %}
@@ -66,8 +65,8 @@
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
-                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
-                        <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
+                        <div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
+                        <div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
                    </div>
                    <div class="pure-control-group inline-radio">
                        {{ render_field(form.processor) }}
@@ -150,21 +149,24 @@
                            {{ render_field(form.method) }}
                        </div>
                        <div id="request-body">
-                                            {{ render_field(form.body, rows=5, placeholder="Example
+                                            {{ render_field(form.body, rows=7, placeholder="Example
 {
   \"name\":\"John\",
   \"age\":30,
-   \"car\":null
+   \"car\":null,
+   \"year\":{% now 'Europe/Berlin', '%Y' %}
 }") }}
                        </div>
+                        <div class="pure-form-message">Variables are supported in the request body (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
                    </div>
                </fieldset>
            <!-- hmm -->
                <div class="pure-control-group advanced-options"  style="display: none;">
-                    {{ render_field(form.headers, rows=5, placeholder="Example
+                    {{ render_field(form.headers, rows=7, placeholder="Example
 Cookie: foobar
-User-Agent: wonderbra 1.0") }}
-
+User-Agent: wonderbra 1.0
+Math: {{ 1 + 1 }}") }}
+                        <div class="pure-form-message">Variables are supported in the request header values (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
                        <div class="pure-form-message-inline">
                            {% if has_extra_headers_file %}
                                <strong>Alert! Extra headers file found and will be added to this watch!</strong>
@@ -330,9 +332,9 @@ nav
                        {{ render_checkbox_field(form.filter_text_added) }}
                        {{ render_checkbox_field(form.filter_text_replaced) }}
                        {{ render_checkbox_field(form.filter_text_removed) }}
-                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span>
-                    <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
-                    <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
+                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br>
+                    <span class="pure-form-message-inline">&nbsp;So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
+                    <span class="pure-form-message-inline">&nbsp;When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
                </fieldset>
                <fieldset class="pure-control-group">
                    {{ render_checkbox_field(form.check_unique_lines) }}
@@ -371,7 +373,7 @@ nav
 ") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
+                            <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
@@ -172,7 +172,7 @@ nav
                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
+                            <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
@@ -276,7 +276,7 @@ nav
                <div class="pure-control-group">
                    {{ render_button(form.save_button) }}
                    <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
-                    <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
+                    <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
                </div>
            </div>
        </form>
@@ -0,0 +1,6 @@
+# A list of real world examples!
+
+Always the price should be 666.66 for our tests
+
+see test_restock_itemprop.py::test_special_prop_examples
+
@@ -0,0 +1,25 @@
+<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
+     data-testid="price-section"
+     data-optly-product-tile-price-section="true"><span
+        class="PriceRange ProductPrice variant-huge" itemprop="offers"
+        itemscope="" itemtype="http://schema.org/Offer"><div
+        class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
+        aria-hidden="true" class="Price variant-huge" data-testid="price"
+        itemprop="price"><sup class="sup" data-testid="price-symbol"
+                              itemprop="priceCurrency" content="AUD">$</sup><span
+        class="dollars" data-testid="price-value" itemprop="price"
+        content="155.55">155.55</span><span class="extras"><span class="sup"
+                                                              data-testid="price-sup"></span></span></span></span>
+</div>
+
+<script type="application/ld+json">{
+                                "@type": "Product",
+                                "@context": "https://schema.org",
+                                "name": "test",
+                                "description": "test",
+                                "offers": {
+                                    "@type": "Offer",
+                                    "priceCurrency": "AUD",
+                                    "price": 155.55
+                                },
+                            }</script>
@@ -16,4 +16,4 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    )

    assert b"1 Imported" in res.data
-    time.sleep(3)
+    wait_for_all_checks(client)
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
+import json
 import os
 from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client


 def set_response():
@@ -18,7 +19,6 @@ def set_response():
        f.write(data)
    time.sleep(1)

-
 def test_socks5(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
    set_response()
@@ -79,3 +79,24 @@ def test_socks5(client, live_server, measure_memory_usage):

    # Should see the proper string
    assert "Awesome, you made it".encode('utf-8') in res.data
+
+    # PROXY CHECKER WIDGET CHECK - this needs more checking
+    uuid = extract_UUID_from_client(client)
+
+    res = client.get(
+        url_for("check_proxies.start_check", uuid=uuid),
+        follow_redirects=True
+    )
+    # It's probably already finished super fast :(
+    #assert b"RUNNING" in res.data
+    
+    wait_for_all_checks(client)
+    res = client.get(
+        url_for("check_proxies.get_recheck_status", uuid=uuid),
+        follow_redirects=True
+    )
+    assert b"OK" in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
@@ -77,6 +77,8 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory

    # The trigger line is REMOVED,  this should trigger
    set_original(excluding='The golden line')
+
+    # Check in the processor here what's going on, its triggering empty-reply and no change.
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
@@ -151,7 +153,6 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa

    # A line thats not the trigger should not trigger anything
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    assert b'1 watches queued for rechecking.' in res.data

    wait_for_all_checks(client)
@@ -173,6 +174,5 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
        assert b'-Oh yes please-' in response
        assert '网站监测 内容更新了'.encode('utf-8') in response

-
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
@@ -26,8 +26,24 @@ def test_backup(client, live_server, measure_memory_usage):
    assert b"1 Imported" in res.data
    wait_for_all_checks(client)

+    # Launch the thread in the background to create the backup
    res = client.get(
-        url_for("get_backup"),
+        url_for("backups.request_backup"),
+        follow_redirects=True
+    )
+    time.sleep(2)
+
+    res = client.get(
+        url_for("backups.index"),
+        follow_redirects=True
+    )
+    # Can see the download link to the backup
+    assert b'<a href="/backups/download/changedetection-backup-20' in res.data
+    assert b'Remove backups' in res.data
+
+    # Get the latest one
+    res = client.get(
+        url_for("backups.download_backup", filename="latest"),
        follow_redirects=True
    )

@@ -44,3 +60,11 @@ def test_backup(client, live_server, measure_memory_usage):

    # Should be two txt files in the archive (history and the snapshot)
    assert len(newlist) == 2
+
+    # Get the latest one
+    res = client.get(
+        url_for("backups.remove_backups"),
+        follow_redirects=True
+    )
+
+    assert b'No backups found.' in res.data
@@ -65,11 +65,8 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
    live_server_setup(live_server)
    # Use a mix of case in ZzZ to prove it works case-insensitive.
    ignore_text = "out of stoCk\r\nfoobar"
-
    set_original_ignore_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
@@ -127,13 +124,24 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data

+    # 2548
+    # Going back to the ORIGINAL should NOT trigger a change
+    set_original_ignore_response()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data

-    # Now we set a change where the text is gone, it should now trigger
+
+    # Now we set a change where the text is gone AND its different content, it should now trigger
    set_modified_response_minus_block_text()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

+
+
+
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
@@ -5,12 +5,41 @@ import time
 from flask import url_for

 from ..html_tools import *
-from .util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks


 def test_setup(live_server):
    live_server_setup(live_server)

+def set_response_with_multiple_index():
+    data= """<!DOCTYPE html>
+<html>
+<body>
+
+<!-- NOTE!! CHROME WILL ADD TBODY HERE IF ITS NOT THERE!! -->
+<table style="width:100%">
+  <tr>
+    <th>Person 1</th>
+    <th>Person 2</th>
+    <th>Person 3</th>
+  </tr>
+  <tr>
+    <td>Emil</td>
+    <td>Tobias</td>
+    <td>Linus</td>
+  </tr>
+  <tr>
+    <td>16</td>
+    <td>14</td>
+    <td>10</td>
+  </tr>
+</table>
+</body>
+</html>
+"""
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(data)
+

 def set_original_response():
    test_return_data = """<html>
@@ -119,12 +148,10 @@ across multiple lines


 def test_element_removal_full(client, live_server, measure_memory_usage):
-    sleep_time_for_fetch_thread = 3
+    #live_server_setup(live_server)

    set_original_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for("test_endpoint", _external=True)
@@ -132,7 +159,8 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        url_for("import_page"), data={"urls": test_url}, follow_redirects=True
    )
    assert b"1 Imported" in res.data
-    time.sleep(1)
+    wait_for_all_checks(client)
+
    # Goto the edit page, add the filter data
    # Not sure why \r needs to be added - absent of the #changetext this is not necessary
    subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
@@ -148,6 +176,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        follow_redirects=True,
    )
    assert b"Updated watch." in res.data
+    wait_for_all_checks(client)

    # Check it saved
    res = client.get(
@@ -156,10 +185,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data

    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data

-    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # so that we set the state to 'unviewed' after all the edits
    client.get(url_for("diff_history_page", uuid="first"))
@@ -168,11 +197,70 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    set_modified_response()

    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data

    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # There should not be an unviewed change, as changes should be removed
    res = client.get(url_for("index"))
    assert b"unviewed" not in res.data
+
+# Re #2752
+def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage):
+    #live_server_setup(live_server)
+
+    set_response_with_multiple_index()
+    subtractive_selectors_data = ["""
+body > table > tr:nth-child(1) > th:nth-child(2)
+body > table >  tr:nth-child(2) > td:nth-child(2)
+body > table > tr:nth-child(3) > td:nth-child(2)
+body > table > tr:nth-child(1) > th:nth-child(3)
+body > table >  tr:nth-child(2) > td:nth-child(3)
+body > table > tr:nth-child(3) > td:nth-child(3)""",
+"""//body/table/tr[1]/th[2]
+//body/table/tr[2]/td[2]
+//body/table/tr[3]/td[2]
+//body/table/tr[1]/th[3]
+//body/table/tr[2]/td[3]
+//body/table/tr[3]/td[3]"""]
+
+    for selector_list in subtractive_selectors_data:
+
+        res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+        assert b'Deleted' in res.data
+
+        # Add our URL to the import page
+        test_url = url_for("test_endpoint", _external=True)
+        res = client.post(
+            url_for("import_page"), data={"urls": test_url}, follow_redirects=True
+        )
+        assert b"1 Imported" in res.data
+        wait_for_all_checks(client)
+
+        res = client.post(
+            url_for("edit_page", uuid="first"),
+            data={
+                "subtractive_selectors": selector_list,
+                "url": test_url,
+                "tags": "",
+                "fetch_backend": "html_requests",
+            },
+            follow_redirects=True,
+        )
+        assert b"Updated watch." in res.data
+        wait_for_all_checks(client)
+
+        res = client.get(
+            url_for("preview_page", uuid="first"),
+            follow_redirects=True
+        )
+
+        assert b"Tobias" not in res.data
+        assert b"Linus" not in res.data
+        assert b"Person 2" not in res.data
+        assert b"Person 3" not in res.data
+        # First column should exist
+        assert b"Emil" in res.data
+
@@ -71,7 +71,7 @@ def test_setup(client, live_server, measure_memory_usage):
    live_server_setup(live_server)

 def test_check_filter_multiline(client, live_server, measure_memory_usage):
-    #live_server_setup(live_server)
+   # live_server_setup(live_server)
    set_multiline_response()

    # Add our URL to the import page
@@ -33,13 +33,17 @@ def test_strip_regex_text_func():

    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)

-    assert b"but 1 lines" in stripped_content
-    assert b"igNORe-cAse text" not in stripped_content
-    assert b"but 1234 lines" not in stripped_content
-    assert b"really" not in stripped_content
-    assert b"not this" not in stripped_content
+    assert "but 1 lines" in stripped_content
+    assert "igNORe-cAse text" not in stripped_content
+    assert "but 1234 lines" not in stripped_content
+    assert "really" not in stripped_content
+    assert "not this" not in stripped_content

    # Check line number reporting
    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
    assert stripped_content == [2, 5, 6, 7, 8, 10]

+    # Check that linefeeds are preserved when there are is no matching ignores
+    content = "some text\n\nand other text\n"
+    stripped_content = html_tools.strip_ignore_text(content, ignore_lines)
+    assert content == stripped_content
@@ -22,10 +22,15 @@ def test_strip_text_func():
    ignore_lines = ["sometimes"]

    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
+    assert "sometimes" not in stripped_content
+    assert "Some content" in stripped_content

-    assert b"sometimes" not in stripped_content
-    assert b"Some content" in stripped_content
+    # Check that line feeds dont get chewed up when something is found
+    test_content = "Some initial text\n\nWhich is across multiple lines\n\nZZZZz\n\n\nSo let's see what happens."
+    ignore = ['something irrelevent but just to check', 'XXXXX', 'YYYYY', 'ZZZZZ']

+    stripped_content = html_tools.strip_ignore_text(test_content, ignore)
+    assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."

 def set_original_ignore_response():
    test_return_data = """<html>
@@ -141,8 +146,6 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa



-
-
    # Just to be sure.. set a regular modified change..
    set_modified_original_ignore_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -153,17 +156,17 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa

    res = client.get(url_for("preview_page", uuid="first"))

-    # Should no longer be in the preview
-    assert b'new ignore stuff' not in res.data
+    # SHOULD BE be in the preview, it was added in set_modified_original_ignore_response()
+    # and we have "new ignore stuff" in ignore_text
+    # it is only ignored, it is not removed (it will be highlighted too)
+    assert b'new ignore stuff' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

+# When adding some ignore text, it should not trigger a change, even if something else on that line changes
 def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
-
-    # Give the endpoint time to spin up
-    time.sleep(1)
-
+    #live_server_setup(live_server)
    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
    set_original_ignore_response()

@@ -172,6 +175,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
        url_for("settings_page"),
        data={
            "requests-time_between_check-minutes": 180,
+            "application-ignore_whitespace": "y",
            "application-global_ignore_text": ignore_text,
            'application-fetch_backend': "html_requests"
        },
@@ -192,9 +196,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
    # Give the thread time to pick it up
    wait_for_all_checks(client)

-
-    # Goto the edit page of the item, add our ignore text
-    # Add our URL to the import page
+    #Adding some ignore text should not trigger a change
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"},
@@ -210,20 +212,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem

    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
-    # Give the thread time to pick it up
    wait_for_all_checks(client)
-
-    # so that we are sure everything is viewed and in a known 'nothing changed' state
-    res = client.get(url_for("diff_history_page", uuid="first"))
-
-    # It should report nothing found (no new 'unviewed' class)
+    # It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data
+#####

-
-    #  Make a change which includes the ignore text
+    # Make a change which includes the ignore text, it should be ignored and no 'change' triggered
+    # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
    set_modified_ignore_response()

    # Trigger a check
@@ -233,6 +230,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem

    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
+
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data

@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+from flask import url_for
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+
+
+def set_response():
+
+    data = f"""<html>
+       <body>Awesome, you made it<br>
+yeah the socks request worked<br>
+something to ignore<br>
+something to trigger<br>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(data)
+
+def test_content_filter_live_preview(client, live_server, measure_memory_usage):
+    live_server_setup(live_server)
+    set_response()
+
+    test_url = url_for('test_endpoint', _external=True)
+
+    res = client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": test_url, "tags": ''},
+        follow_redirects=True
+    )
+    uuid = extract_UUID_from_client(client)
+    res = client.post(
+        url_for("edit_page", uuid=uuid),
+        data={
+            "include_filters": "",
+            "fetch_backend": 'html_requests',
+            "ignore_text": "something to ignore",
+            "trigger_text": "something to trigger",
+            "url": test_url,
+        },
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    wait_for_all_checks(client)
+
+    # The endpoint is a POST and accepts the form values to override the watch preview
+    import json
+
+    # DEFAULT OUTPUT WITHOUT ANYTHING UPDATED/CHANGED - SHOULD SEE THE WATCH DEFAULTS
+    res = client.post(
+        url_for("watch_get_preview_rendered", uuid=uuid)
+    )
+    default_return = json.loads(res.data.decode('utf-8'))
+    assert default_return.get('after_filter')
+    assert default_return.get('before_filter')
+    assert default_return.get('ignore_line_numbers') == [3] # "something to ignore" line 3
+    assert default_return.get('trigger_line_numbers') == [4] # "something to trigger" line 4
+
+    # SEND AN UPDATE AND WE SHOULD SEE THE OUTPUT CHANGE SO WE KNOW TO HIGHLIGHT NEW STUFF
+    res = client.post(
+        url_for("watch_get_preview_rendered", uuid=uuid),
+        data={
+            "include_filters": "",
+            "fetch_backend": 'html_requests',
+            "ignore_text": "sOckS", # Also be sure case insensitive works
+            "trigger_text": "AweSOme",
+            "url": test_url,
+        },
+    )
+    reply = json.loads(res.data.decode('utf-8'))
+    assert reply.get('after_filter')
+    assert reply.get('before_filter')
+    assert reply.get('ignore_line_numbers') == [2]  # Ignored - "socks" on line 2
+    assert reply.get('trigger_line_numbers') == [1]  # Triggers "Awesome" in line 1
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
@@ -284,7 +284,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
    # CUSTOM JSON BODY CHECK for POST://
    set_original_response()
    # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#header-manipulation
-    test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123"
+    test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123&+second=hello+world%20%22space%22"

    res = client.post(
        url_for("settings_page"),
@@ -326,6 +326,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
        assert j['secret'] == 444
        assert j['somebug'] == '网站监测 内容更新了'

+
    # URL check, this will always be converted to lowercase
    assert os.path.isfile("test-datastore/notification-url.txt")
    with open("test-datastore/notification-url.txt", 'r') as f:
@@ -337,6 +338,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
    with open("test-datastore/notification-headers.txt", 'r') as f:
        notification_headers = f.read()
        assert 'custom-header: 123' in notification_headers.lower()
+        assert 'second: hello world "space"' in notification_headers.lower()


    # Should always be automatically detected as JSON content type even when we set it as 'Text' (default)
@@ -429,3 +431,15 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
        follow_redirects=True
    )

+    ######### Test global/system settings - When everything is deleted it should give a helpful error
+    # See #2727
+    res = client.post(
+        url_for("ajax_callback_send_notification_test")+"?mode=global-settings",
+        data={"notification_urls": test_notification_url},
+        follow_redirects=True
+    )
+    assert res.status_code == 400
+    assert b"Error: You must have atleast one watch configured for 'test notification' to work" in res.data
+
+
+
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+import time
+from flask import url_for
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
+
+
+# `subtractive_selectors` should still work in `source:` type requests
+def test_fetch_pdf(client, live_server, measure_memory_usage):
+    import shutil
+    shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
+
+    live_server_setup(live_server)
+    test_url = url_for('test_pdf_endpoint', _external=True)
+    # Add our URL to the import page
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+
+    assert b"1 Imported" in res.data
+
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    # PDF header should not be there (it was converted to text)
+    assert b'PDF' not in res.data[:10]
+    assert b'hello world' in res.data
+
+    # So we know if the file changes in other ways
+    import hashlib
+    original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
+    # We should have one
+    assert len(original_md5) > 0
+    # And it's going to be in the document
+    assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data
+
+    shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
+    changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data
+
+    wait_for_all_checks(client)
+
+    # Now something should be ready, indicated by having a 'unviewed' class
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+    # The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert original_md5.encode('utf-8') not in res.data
+    assert changed_md5.encode('utf-8') in res.data
+
+    res = client.get(
+        url_for("diff_history_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert original_md5.encode('utf-8') in res.data
+    assert changed_md5.encode('utf-8') in res.data
+
+    assert b'here is a change' in res.data
@@ -45,7 +45,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
              "url": test_url,
              "tags": "",
              "fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
-              "headers": "xxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
+              "headers": "jinja2:{{ 1+1 }}\nxxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@@ -61,6 +61,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
    )

    # Flask will convert the header key to uppercase
+    assert b"Jinja2:2" in res.data
    assert b"Xxx:ooo" in res.data
    assert b"Cool:yeah" in res.data

@@ -117,7 +118,8 @@ def test_body_in_request(client, live_server, measure_memory_usage):
    wait_for_all_checks(client)

    # Now the change which should trigger a change
-    body_value = 'Test Body Value'
+    body_value = 'Test Body Value {{ 1+1 }}'
+    body_value_formatted = 'Test Body Value 2'
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={
@@ -140,8 +142,9 @@ def test_body_in_request(client, live_server, measure_memory_usage):

    # If this gets stuck something is wrong, something should always be there
    assert b"No history found" not in res.data
-    # We should see what we sent in the reply
-    assert str.encode(body_value) in res.data
+    # We should see the formatted value of what we sent in the reply
+    assert str.encode(body_value) not in res.data
+    assert str.encode(body_value_formatted) in res.data

    ####### data sanity checks
    # Add the test URL twice, we will check
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
+from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, extract_UUID_from_client
 from ..notification import default_notification_format

 instock_props = [
@@ -367,6 +367,12 @@ def test_change_with_notification_values(client, live_server):
        assert "new price 1950.45" in notification
        assert "title new price 1950.45" in notification

+    ## Now test the "SEND TEST NOTIFICATION" is working
+    os.unlink("test-datastore/notification.txt")
+    uuid = extract_UUID_from_client(client)
+    res = client.post(url_for("ajax_callback_send_notification_test", watch_uuid=uuid), data={}, follow_redirects=True)
+    time.sleep(5)
+    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"


 def test_data_sanity(client, live_server):
@@ -413,3 +419,31 @@ def test_data_sanity(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"))
    assert test_url2.encode('utf-8') in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
+# All examples should give a prive of 666.66
+def test_special_prop_examples(client, live_server):
+    import glob
+    #live_server_setup(live_server)
+
+    test_url = url_for('test_endpoint', _external=True)
+    check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
+    files = glob.glob(check_path)
+    assert files
+    for test_example_filename in files:
+        with open(test_example_filename, 'r') as example_f:
+            with open("test-datastore/endpoint-content.txt", "w") as test_f:
+                test_f.write(f"<html><body>{example_f.read()}</body></html>")
+
+            # Now fetch it and check the price worked
+            client.post(
+                url_for("form_quick_watch_add"),
+                data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
+                follow_redirects=True
+            )
+            wait_for_all_checks(client)
+            res = client.get(url_for("index"))
+            assert b'ception' not in res.data
+            assert b'155.55' in res.data
@@ -61,10 +61,10 @@ def test_bad_access(client, live_server, measure_memory_usage):
    assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data


-def test_file_access(client, live_server, measure_memory_usage):
+def test_file_slashslash_access(client, live_server, measure_memory_usage):
    #live_server_setup(live_server)

-    test_file_path = "/tmp/test-file.txt"
+    test_file_path = os.path.abspath(__file__)

    # file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
    client.post(
@@ -82,8 +82,30 @@ def test_file_access(client, live_server, measure_memory_usage):
            follow_redirects=True
        )

-        # Should see something (this file added by run_basic_tests.sh)
-        assert b"Hello world" in res.data
+        assert b"test_file_slashslash_access" in res.data
+    else:
+        # Default should be here
+        assert b'file:// type access is denied for security reasons.' in res.data
+
+def test_file_slash_access(client, live_server, measure_memory_usage):
+    #live_server_setup(live_server)
+
+    test_file_path = os.path.abspath(__file__)
+
+    # file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
+    client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": f"file:/{test_file_path}", "tags": ''},
+        follow_redirects=True
+    )
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+
+    # If it is enabled at test time
+    if strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
+        # So it should permit it, but it should fall back to the 'requests' library giving an error
+        # (but means it gets passed to playwright etc)
+        assert b"URLs with hostname components are not permitted" in res.data
    else:
        # Default should be here
        assert b'file:// type access is denied for security reasons.' in res.data
@@ -18,12 +18,13 @@ class TestDiffBuilder(unittest.TestCase):

        watch['last_viewed'] = 110

-        watch.save_history_text(contents=b"hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
+        # Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python
+        watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))

        p = watch.get_next_snapshot_key_to_last_viewed
        assert p == "112", "Correct last-viewed timestamp was detected"
@@ -81,7 +81,8 @@ class update_worker(threading.Thread):
            'watch_url': watch.get('url') if watch else None,
        })

-        n_object.update(watch.extra_notification_token_values())
+        if watch:
+            n_object.update(watch.extra_notification_token_values())

        logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
        logger.debug("Queued notification for sending")
@@ -260,9 +261,6 @@ class update_worker(threading.Thread):
                    try:
                        # Processor is what we are using for detecting the "Change"
                        processor = watch.get('processor', 'text_json_diff')
-                        # Abort processing when the content was the same as the last fetch
-                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
-

                        # Init a new 'difference_detection_processor', first look in processors
                        processor_module_name = f"changedetectionio.processors.{processor}.processor"
@@ -278,16 +276,13 @@ class update_worker(threading.Thread):

                        update_handler.call_browser()

-                        changed_detected, update_obj, contents = update_handler.run_changedetection(
-                            watch=watch,
-                            skip_when_checksum_same=skip_when_same_checksum,
-                        )
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)

                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
                        # We then convert/.decode('utf-8') for the notification etc
-                        if not isinstance(contents, (bytes, bytearray)):
-                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
+#                        if not isinstance(contents, (bytes, bytearray)):
+#                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
                    except PermissionError as e:
                        logger.critical(f"File permission error updating file, watch: {uuid}")
                        logger.critical(str(e))
@@ -74,7 +74,7 @@ services:
     # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
     # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
 #      depends_on:
-#          playwright-chrome:
+#          sockpuppetbrowser:
 #              condition: service_started


@@ -59,7 +59,9 @@ elementpath==4.1.5

 selenium~=4.14.0

-werkzeug~=3.0
+# https://github.com/pallets/werkzeug/issues/2985
+# Maybe related to pytest?
+werkzeug==3.0.6

 # Templating, so far just in the URLs but in the future can be for the notifications also
 jinja2~=3.1
Author	SHA1	Message	Date
dgtlmoon	30515c0e9f	More debug around queue size	2024-11-10 10:21:18 +01:00
dgtlmoon	4bda1a234f	Update bug_report.md	2024-11-10 10:13:22 +01:00
dgtlmoon	d297850539	Security - Fix test	2024-11-07 20:10:02 +01:00
dgtlmoon	751239250f	Security check - improve test	2024-11-07 19:41:48 +01:00
dgtlmoon	6aceeb01ab	0.47.06	2024-11-07 18:47:18 +01:00
dgtlmoon	49bc982c69	CVE-2024-51998 - file:/ path traversal access should not be allowed to access a file without ALLOW_FILE_URI set	2024-11-07 18:45:19 +01:00
Arthur Nogueira Neves	e0abf0b505	Update docker-compose.yml (#2767 )	2024-11-06 18:41:55 +01:00
dgtlmoon	f08a1185aa	Price tracker - fix for sites that supply an empty additional price (#2758 )	2024-11-01 10:56:27 +01:00
dgtlmoon	ad5d7efbbf	Testing - Pinning werkzeug (#2757 )	2024-11-01 10:23:34 +01:00
dgtlmoon	7029d10f8b	0.47.05	2024-10-31 22:51:03 +01:00
dgtlmoon	26d3a23e05	CVE-2024-51483 - Fix for limiting access to file:// via source:file:///tmp/file.txt when using webdriver/playwright	2024-10-31 22:49:31 +01:00
dgtlmoon	942625e1fb	Backups - Hide incomplete/running backups from being downloaded	2024-10-31 10:58:41 +01:00
dgtlmoon	33c83230a6	Backups - Backups now operate in the background, provide a nice UI to access/download previous backups (#2755 )	2024-10-31 10:34:59 +01:00
dgtlmoon	87510becb5	Filters - Process all CSS and XPath 'subtract' selectors in a single pass to prevent index shifting and reference loss during DOM manipulation. (#2754 )	2024-10-30 12:00:53 +01:00
dgtlmoon	5e95dc62a5	0.47.04	2024-10-29 08:25:05 +01:00
dgtlmoon	7d94535dbf	Do not recheck 'paused' watches on edit/save (Re #2747 #2750 )	2024-10-29 08:24:15 +01:00
dgtlmoon	563c196396	Notification post:// get:// etc - Fixing URL encoding of headers so that '+' in URL is correctly parsed as ' ' (and other url-encodings) (#2745 )	2024-10-28 16:59:49 +01:00
Christopher Charbonneau Wells	e8b82c47ca	#2502 - Add jinja2 template handling to request body and headers (#2740 )	2024-10-28 15:46:05 +01:00
Gonçalo Silva	e84de7e8f4	Restock detection - Add additional out-of-stock detection for PT language (#2738 )	2024-10-24 20:03:14 +02:00
dgtlmoon	1543edca24	"Send test notification" in "Restock" mode was not working correclty when restock tokens "{{restock.price}}" were in the notification body (#2737 )	2024-10-24 19:46:45 +02:00
dgtlmoon	82e0b99b07	#2727 Notifications - Fix "send test notification" on empty list, includes test (#2731 )	2024-10-21 11:35:37 +02:00
Emmanuel Ojighoro	b0ff9d161e	UI - Fix mobile styling inconsistencies and resolve diff page overflow issue (#2716 )	2024-10-21 11:34:22 +02:00
dgtlmoon	c1dd681643	Filters - "Block change detection when text exists" should not trigger a change when the original text returns	2024-10-14 12:57:02 +02:00
dgtlmoon	ecafa27833	UI - More work on tab buttons hiding behind menu/header :-)	2024-10-11 22:54:09 +02:00
dgtlmoon	f7d4e58613	0.47.03	2024-10-11 17:33:00 +02:00
dgtlmoon	5bb47e47db	Remove same checksum skip check - saved a little CPU but added a lot of complexity (#2700 )	2024-10-11 17:28:42 +02:00
dgtlmoon	03151da68e	UI - Fix scroll offset / tab buttons hiding behind menu/header	2024-10-11 16:04:08 +02:00
dgtlmoon	a16a70229d	0.47.01	2024-10-11 15:02:17 +02:00
dgtlmoon	9476c1076b	Adding missing `apprise_plugin` for pypi/pip based installs	2024-10-11 15:01:27 +02:00
dgtlmoon	a4959b5971	0.47.00	2024-10-11 13:04:56 +02:00
dgtlmoon	a278fa22f2	Restock multiprice improvements (#2698 )	2024-10-11 11:43:35 +02:00
dgtlmoon	d39530b261	Test - Simple test for live preview	2024-10-11 11:07:12 +02:00
dgtlmoon	d4b4355ff5	Adding test for proxy checker/scanner (#2697 )	2024-10-11 09:52:55 +02:00
dgtlmoon	c1c8de3104	Fixing proxy checker (#2696 )	2024-10-11 00:19:19 +02:00
dgtlmoon	5a768d7db3	UTF-8 handling fixes, Improvements to whitespace filtering (#2691 )	2024-10-10 14:59:39 +02:00
dgtlmoon	f38429ec93	Testing - Tidyup (#2693 )	2024-10-10 12:45:23 +02:00
dgtlmoon	783926962d	Filters & Text - Preview refactor/improvements (#2689 )	2024-10-09 09:17:32 +02:00
Marc	6cd1d50a4f	Build - Add image source label to Dockerfile (Better Renovate and others support) (#2690 )	2024-10-09 08:30:23 +02:00