Adding <button>

2025-11-09 19:17:15 +00:00 · 2024-10-07 16:08:40 +02:00
46 changed files with 432 additions and 833 deletions
--- a/1
+++ b/1
@@ -37,7 +37,6 @@ RUN pip install --target=/dependencies playwright~=1.41.2 \

 # Final image stage
 FROM python:${PYTHON_VERSION}-slim-bookworm
-LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"

 RUN apt-get update && apt-get install -y --no-install-recommends \
    libxslt1.1 \
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,4 @@
 recursive-include changedetectionio/api *
-recursive-include changedetectionio/apprise_plugin *
 recursive-include changedetectionio/blueprint *
 recursive-include changedetectionio/content_fetchers *
 recursive-include changedetectionio/model *
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.47.03'
+__version__ = '0.46.04'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -58,7 +58,7 @@ class Watch(Resource):
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

        if request.args.get('recheck'):
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
            self.datastore.data['watching'].get(uuid).pause()
@@ -246,7 +246,7 @@ class CreateWatch(Resource):

        new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
        if new_uuid:
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
            return {'uuid': new_uuid}, 201
        else:
            return "Invalid or unsupported URL", 400
@@ -303,7 +303,7 @@ class CreateWatch(Resource):

        if request.args.get('recheck_all'):
            for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return {'status': "OK"}, 200

        return list, 200
--- a/changedetectionio/apprise_plugin/init.py
+++ b/changedetectionio/apprise_plugin/init.py
@@ -1,6 +1,5 @@
 # include the decorator
 from apprise.decorators import notify
-from loguru import logger

@notify(on="delete")
@notify(on="deletes")
@@ -65,12 +64,10 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
            auth = (URLBase.unquote(results.get('user')))

    # Try to auto-guess if it's JSON
-    h = 'application/json; charset=utf-8'
    try:
        json.loads(body)
-        headers['Content-Type'] = h
+        headers['Content-Type'] = 'application/json; charset=utf-8'
    except ValueError as e:
-        logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}")
        pass

    r(results.get('url'),
--- a/changedetectionio/blueprint/check_proxies/init.py
+++ b/changedetectionio/blueprint/check_proxies/init.py
@@ -1,7 +1,4 @@
-import importlib
 from concurrent.futures import ThreadPoolExecutor
-
-from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 from changedetectionio.store import ChangeDetectionStore

 from functools import wraps
@@ -33,6 +30,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def long_task(uuid, preferred_proxy):
        import time
        from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
+        from changedetectionio.processors.text_json_diff import text_json_diff
        from changedetectionio.safe_jinja import render as jinja_render

        status = {'status': '', 'length': 0, 'text': ''}
@@ -40,12 +38,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        contents = ''
        now = time.time()
        try:
-            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
-            update_handler = processor_module.perform_site_check(datastore=datastore,
-                                                                 watch_uuid=uuid
-                                                                 )
-
-            update_handler.call_browser(preferred_proxy_id=preferred_proxy)
+            update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
+            update_handler.call_browser()
        # title, size is len contents not len xfer
        except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
            if e.status_code == 404:
@@ -54,7 +48,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
            else:
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
-        except FilterNotFoundInResponse:
+        except text_json_diff.FilterNotFoundInResponse:
            status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
        except content_fetcher_exceptions.EmptyReply as e:
            if e.status_code == 403 or e.status_code == 401:
--- a/changedetectionio/blueprint/price_data_follower/init.py
+++ b/changedetectionio/blueprint/price_data_follower/init.py
@@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
        datastore.data['watching'][uuid]['processor'] = 'restock_diff'
        datastore.data['watching'][uuid].clear_watch()
-        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
        return redirect(url_for("index"))

    @login_required
--- a/changedetectionio/blueprint/tags/templates/edit-tag.html
+++ b/changedetectionio/blueprint/tags/templates/edit-tag.html
@@ -17,6 +17,7 @@
 </script>

 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
+<!--<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>-->
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>

 <div class="edit-form monospaced-textarea">
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@@ -75,7 +75,6 @@ class fetcher(Fetcher):
        self.headers = r.headers

        if not r.content or not len(r.content):
-            logger.debug(f"Requests returned empty content for '{url}'")
            if not empty_pages_are_a_change:
                raise EmptyReply(url=url, status_code=r.status_code)
            else:
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -154,14 +154,10 @@ function isItemInStock() {
        }

        elementText = "";
-        try {
-            if (element.tagName.toLowerCase() === "input") {
-                elementText = element.value.toLowerCase().trim();
-            } else {
-                elementText = getElementBaseText(element);
-            }
-        } catch (e) {
-            console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
+        if (element.tagName.toLowerCase() === "input") {
+            elementText = element.value.toLowerCase().trim();
+        } else {
+            elementText = getElementBaseText(element);
        }

        if (elementText.length) {
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3

 import datetime
+import importlib

 import flask_login
 import locale
@@ -11,7 +12,9 @@ import threading
 import time
 import timeago

+from .content_fetchers.exceptions import ReplyWithContentButNoText
 from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
+from .processors.text_json_diff.processor import FilterNotFoundInResponse
 from .safe_jinja import render as jinja_render
 from changedetectionio.strtobool import strtobool
 from copy import deepcopy
@@ -788,6 +791,7 @@ def changedetection_app(config=None, datastore_o=None):
            # Recast it if need be to right data Watch handler
            watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
            datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
+
            flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")

            # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
@@ -795,7 +799,7 @@ def changedetection_app(config=None, datastore_o=None):
            datastore.needs_write_urgent = True

            # Queue the watch for immediate recheck, with a higher priority
-            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))

            # Diff page [edit] link should go back to diff page
            if request.args.get("next") and request.args.get("next") == 'diff':
@@ -976,7 +980,7 @@ def changedetection_app(config=None, datastore_o=None):
                importer = import_url_list()
                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
                for uuid in importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

                if len(importer.remaining_data) == 0:
                    return redirect(url_for('index'))
@@ -989,7 +993,7 @@ def changedetection_app(config=None, datastore_o=None):
                d_importer = import_distill_io_json()
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                for uuid in d_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

            # XLSX importer
            if request.files and request.files.get('xlsx_file'):
@@ -1013,7 +1017,7 @@ def changedetection_app(config=None, datastore_o=None):
                    w_importer.run(data=file, flash=flash, datastore=datastore)

                for uuid in w_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

        # Could be some remaining, or we could be on GET
        form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
@@ -1377,9 +1381,78 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
    @login_optionally_required
    def watch_get_preview_rendered(uuid):
+        from flask import jsonify
        '''For when viewing the "preview" of the rendered text from inside of Edit'''
-        from .processors.text_json_diff import prepare_filter_prevew
-        return prepare_filter_prevew(watch_uuid=uuid, datastore=datastore)
+        now = time.time()
+        import brotli
+        from . import forms
+
+        text_after_filter = ''
+        tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
+
+        if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
+            # Splice in the temporary stuff from the form
+            form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
+                                                       data=request.form
+                                                       )
+            # Only update vars that came in via the AJAX post
+            p = {k: v for k, v in form.data.items() if k in request.form.keys()}
+            tmp_watch.update(p)
+
+            latest_filename = next(reversed(tmp_watch.history))
+            html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
+            with open(html_fname, 'rb') as f:
+                decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
+
+                # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
+                processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
+                update_handler = processor_module.perform_site_check(datastore=datastore,
+                                                                     watch_uuid=uuid # probably not needed anymore anyway?
+                                                                     )
+                # Use the last loaded HTML as the input
+                update_handler.fetcher.content = decompressed_data
+                update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
+                try:
+                    changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
+                        watch=tmp_watch,
+                        skip_when_checksum_same=False,
+                    )
+                except FilterNotFoundInResponse as e:
+                    text_after_filter = f"Filter not found in HTML: {str(e)}"
+                except ReplyWithContentButNoText as e:
+                    text_after_filter = f"Filter found but no text (empty result)"
+                except Exception as e:
+                    text_after_filter = f"Error: {str(e)}"
+
+            if not text_after_filter.strip():
+                text_after_filter = 'Empty content'
+
+        # because run_changedetection always returns bytes due to saving the snapshots etc
+        text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
+
+        do_anchor = datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
+
+        trigger_line_numbers = []
+        try:
+            text_before_filter = html_tools.html_to_text(html_content=decompressed_data,
+                                                         render_anchor_tag_content=do_anchor)
+
+            trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
+                                                                wordlist=tmp_watch['trigger_text'],
+                                                                mode='line numbers'
+                                                                )
+        except Exception as e:
+            text_before_filter = f"Error: {str(e)}"
+
+        logger.trace(f"Parsed in {time.time() - now:.3f}s")
+
+        return jsonify(
+            {
+                'after_filter': text_after_filter,
+                'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
+                'trigger_line_numbers': trigger_line_numbers
+            }
+        )


    @app.route("/form/add/quickwatch", methods=['POST'])
@@ -1442,7 +1515,7 @@ def changedetection_app(config=None, datastore_o=None):
        new_uuid = datastore.clone(uuid)
        if new_uuid:
            if not datastore.data['watching'].get(uuid).get('paused'):
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
            flash('Cloned.')

        return redirect(url_for('index'))
@@ -1463,7 +1536,7 @@ def changedetection_app(config=None, datastore_o=None):

        if uuid:
            if uuid not in running_uuids:
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            i = 1

        elif tag:
@@ -1474,7 +1547,7 @@ def changedetection_app(config=None, datastore_o=None):
                        continue
                    if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                        update_q.put(
-                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
+                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
                        )
                        i += 1

@@ -1484,8 +1557,9 @@ def changedetection_app(config=None, datastore_o=None):
                if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                    if with_errors and not watch.get('last_error'):
                        continue
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
                    i += 1
+
        flash(f"{i} watches queued for rechecking.")
        return redirect(url_for('index', tag=tag))

@@ -1542,7 +1616,7 @@ def changedetection_app(config=None, datastore_o=None):
                uuid = uuid.strip()
                if datastore.data['watching'].get(uuid):
                    # Recheck and require a full reprocessing
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            flash("{} watches queued for rechecking".format(len(uuids)))

        elif (op == 'clear-errors'):
@@ -1866,7 +1940,7 @@ def ticker_thread_check_time_launch_checks():
                        f"{now - watch['last_checked']:0.2f}s since last checked")

                    # Into the queue with you
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))

                    # Reset for next time
                    watch.jitter_seconds = 0
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -476,7 +476,7 @@ class processor_text_json_diff_form(commonSettingsForm):

    title = StringField('Title', default='')

-    ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()])
+    ignore_text = StringListField('Remove lines containing', [ValidateListRegex()])
    headers = StringDictKeyValue('Request headers')
    body = TextAreaField('Request body', [validators.Optional()])
    method = SelectField('Request method', choices=valid_method, default=default_method)
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -3,11 +3,11 @@ from lxml import etree
 import json
 import re

+
 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
-TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
-PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'

+PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
 # 'price' , 'lowPrice', 'highPrice' are usually under here
 # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
 LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
@@ -326,7 +326,6 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
 #          - "line numbers" return a list of line numbers that match (int list)
 #
 # wordlist - list of regex's (str) or words (str)
-# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
 def strip_ignore_text(content, wordlist, mode="content"):
    i = 0
    output = []
@@ -342,30 +341,32 @@ def strip_ignore_text(content, wordlist, mode="content"):
        else:
            ignore_text.append(k.strip())

-    for line in content.splitlines(keepends=True):
+    for line in content.splitlines():
        i += 1
        # Always ignore blank lines in this mode. (when this function gets called)
        got_match = False
-        for l in ignore_text:
-            if l.lower() in line.lower():
-                got_match = True
-
-        if not got_match:
-            for r in ignore_regex:
-                if r.search(line):
+        if len(line.strip()):
+            for l in ignore_text:
+                if l.lower() in line.lower():
                    got_match = True

-        if not got_match:
-            # Not ignored, and should preserve "keepends"
-            output.append(line)
-        else:
-            ignored_line_numbers.append(i)
+            if not got_match:
+                for r in ignore_regex:
+                    if r.search(line):
+                        got_match = True
+
+            if not got_match:
+                # Not ignored
+                output.append(line.encode('utf8'))
+            else:
+                ignored_line_numbers.append(i)
+

    # Used for finding out what to highlight
    if mode == "line numbers":
        return ignored_line_numbers

-    return ''.join(output)
+    return "\n".encode('utf8').join(output)

 def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
    from xml.sax.saxutils import escape as xml_escape
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -6,8 +6,6 @@ import re
 from pathlib import Path
 from loguru import logger

-from ..html_tools import TRANSLATE_WHITESPACE_TABLE
-
 # Allowable protocols, protects against javascript: etc
 # file:// is further checked by ALLOW_FILE_URI
 SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
@@ -38,9 +36,8 @@ class model(watch_base):
    jitter_seconds = 0

    def __init__(self, *arg, **kw):
-        self.__datastore_path = kw.get('datastore_path')
-        if kw.get('datastore_path'):
-            del kw['datastore_path']
+        self.__datastore_path = kw['datastore_path']
+        del kw['datastore_path']
        super(model, self).__init__(*arg, **kw)
        if kw.get('default'):
            self.update(kw['default'])
@@ -174,10 +171,6 @@ class model(watch_base):
        """
        tmp_history = {}

-        # In the case we are only using the watch for processing without history
-        if not self.watch_data_dir:
-            return []
-
        # Read the history file as a dict
        fname = os.path.join(self.watch_data_dir, "history.txt")
        if os.path.isfile(fname):
@@ -314,13 +307,13 @@ class model(watch_base):
            dest = os.path.join(self.watch_data_dir, snapshot_fname)
            if not os.path.exists(dest):
                with open(dest, 'wb') as f:
-                    f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
+                    f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
        else:
            snapshot_fname = f"{snapshot_id}.txt"
            dest = os.path.join(self.watch_data_dir, snapshot_fname)
            if not os.path.exists(dest):
                with open(dest, 'wb') as f:
-                    f.write(contents.encode('utf-8'))
+                    f.write(contents)

        # Append to index
        # @todo check last char was \n
@@ -352,32 +345,14 @@ class model(watch_base):
        return seconds

    # Iterate over all history texts and see if something new exists
-    # Always applying .strip() to start/end but optionally replace any other whitespace
-    def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
-        local_lines = []
-        if lines:
-            if ignore_whitespace:
-                if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
-                    local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
-                else:
-                    local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
-            else:
-                if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
-                    local_lines = set([l.strip().lower() for l in lines])
-                else:
-                    local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
-
+    def lines_contain_something_unique_compared_to_history(self, lines: list):
+        local_lines = set([l.decode('utf-8').strip().lower() for l in lines])

        # Compare each lines (set) against each history text file (set) looking for something new..
        existing_history = set({})
        for k, v in self.history.items():
            content = self.get_history_snapshot(k)
-
-            if ignore_whitespace:
-                alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
-            else:
-                alist = set([line.strip().lower() for line in content.splitlines()])
-
+            alist = set([line.strip().lower() for line in content.splitlines()])
            existing_history = existing_history.union(alist)

        # Check that everything in local_lines(new stuff) already exists in existing_history - it should
@@ -421,8 +396,8 @@ class model(watch_base):
    @property
    def watch_data_dir(self):
        # The base dir of the watch data
-        return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
-
+        return os.path.join(self.__datastore_path, self['uuid'])
+    
    def get_error_text(self):
        """Return the text saved from a previous request that resulted in a non-200 error"""
        fname = os.path.join(self.watch_data_dir, "last-error.txt")
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -18,7 +18,6 @@ class difference_detection_processor():
    screenshot = None
    watch = None
    xpath_data = None
-    preferred_proxy = None

    def __init__(self, *args, datastore, watch_uuid, **kwargs):
        super().__init__(*args, **kwargs)
@@ -27,8 +26,7 @@ class difference_detection_processor():
        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()

-    def call_browser(self, preferred_proxy_id=None):
-
+    def call_browser(self):
        from requests.structures import CaseInsensitiveDict

        # Protect against file:// access
@@ -44,7 +42,7 @@ class difference_detection_processor():
        prefer_fetch_backend = self.watch.get('fetch_backend', 'system')

        # Proxy ID "key"
-        preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
+        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))

        # Pluggable content self.fetcher
        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
@@ -157,7 +155,7 @@ class difference_detection_processor():
        # After init, call run_changedetection() which will do the actual change-detection

    @abstractmethod
-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -27,27 +27,22 @@ def _search_prop_by_value(matches, value):
                return prop[1]  # Yield the desired value and exit the function

 def _deduplicate_prices(data):
-    import re
+    seen = set()
+    unique_data = []

-    '''
-    Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
-    Get all the values, clean it and add it to a set then return the unique values
-    '''
-    unique_data = set()
-
-    # Return the complete 'datum' where its price was not seen before
    for datum in data:
+        # Convert 'value' to float if it can be a numeric string, otherwise leave it as is
+        try:
+            normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
+        except ValueError:
+            normalized_value = datum.value

-        if isinstance(datum.value, list):
-            # Process each item in the list
-            normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value])
-            unique_data.update(normalized_value)
-        else:
-            # Process single value
-            v = float(re.sub(r'[^\d.]', '', str(datum.value)))
-            unique_data.add(v)
-
-    return list(unique_data)
+        # If the normalized value hasn't been seen yet, add it to unique data
+        if normalized_value not in seen:
+            unique_data.append(datum)
+            seen.add(normalized_value)
+    
+    return unique_data


 # should return Restock()
@@ -88,13 +83,14 @@ def get_itemprop_availability(html_content) -> Restock:
        if price_result:
            # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
            # parse that for the UI?
-            if len(price_result) > 1 and len(price_result) > 1:
+            prices_found = set(str(item.value).replace('$', '') for item in price_result)
+            if len(price_result) > 1 and len(prices_found) > 1:
                # See of all prices are different, in the case that one product has many embedded data types with the same price
                # One might have $121.95 and another 121.95 etc
-                logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
+                logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
                raise MoreThanOnePriceFound()

-            value['price'] = price_result[0]
+            value['price'] = price_result[0].value

        pricecurrency_result = pricecurrency_parse.find(data)
        if pricecurrency_result:
@@ -144,7 +140,7 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
        import hashlib

        if not watch:
@@ -224,7 +220,7 @@ class perform_site_check(difference_detection_processor):
            itemprop_availability['original_price'] = itemprop_availability.get('price')
            update_obj['restock']["original_price"] = itemprop_availability.get('price')

-        if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
+        if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
            raise ProcessorException(
                message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
                url=watch.get('url'),
@@ -241,14 +237,6 @@ class perform_site_check(difference_detection_processor):
            update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
            logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.")

-        # Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that.
-        if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock':
-            if update_obj['restock'].get('in_stock'):
-                logger.warning(
-                    f"Lie detected in the availability machine data!! when scraping said its not in stock!! itemprop was '{itemprop_availability}' and scraped from browser was '{self.fetcher.instock_data}' update obj was {update_obj['restock']} ")
-                logger.warning(f"Setting instock to FALSE, scraper found '{self.fetcher.instock_data}' in the body but metadata reported not-in-stock")
-                update_obj['restock']["in_stock"] = False
-
        # What we store in the snapshot
        price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
        snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}"
@@ -311,4 +299,4 @@ class perform_site_check(difference_detection_processor):
        # Always record the new checksum
        update_obj["previous_md5"] = fetched_md5

-        return changed_detected, update_obj, snapshot_content.strip()
+        return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
--- a/changedetectionio/processors/text_json_diff/init.py
+++ b/changedetectionio/processors/text_json_diff/init.py
@@ -1,115 +0,0 @@
-
-from loguru import logger
-
-
-
-def _task(watch, update_handler):
-    from changedetectionio.content_fetchers.exceptions import ReplyWithContentButNoText
-    from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
-
-    text_after_filter = ''
-
-    try:
-        # The slow process (we run 2 of these in parallel)
-        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
-    except FilterNotFoundInResponse as e:
-        text_after_filter = f"Filter not found in HTML: {str(e)}"
-    except ReplyWithContentButNoText as e:
-        text_after_filter = f"Filter found but no text (empty result)"
-    except Exception as e:
-        text_after_filter = f"Error: {str(e)}"
-
-    if not text_after_filter.strip():
-        text_after_filter = 'Empty content'
-
-    # because run_changedetection always returns bytes due to saving the snapshots etc
-    text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
-
-    return text_after_filter
-
-
-def prepare_filter_prevew(datastore, watch_uuid):
-    '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
-    from changedetectionio import forms, html_tools
-    from changedetectionio.model.Watch import model as watch_model
-    from concurrent.futures import ProcessPoolExecutor
-    from copy import deepcopy
-    from flask import request, jsonify
-    import brotli
-    import importlib
-    import os
-    import time
-    now = time.time()
-
-    text_after_filter = ''
-    text_before_filter = ''
-    trigger_line_numbers = []
-    ignore_line_numbers = []
-
-    tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
-
-    if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
-        # Splice in the temporary stuff from the form
-        form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
-                                                   data=request.form
-                                                   )
-
-        # Only update vars that came in via the AJAX post
-        p = {k: v for k, v in form.data.items() if k in request.form.keys()}
-        tmp_watch.update(p)
-        blank_watch_no_filters = watch_model()
-        blank_watch_no_filters['url'] = tmp_watch.get('url')
-
-        latest_filename = next(reversed(tmp_watch.history))
-        html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
-        with open(html_fname, 'rb') as f:
-            decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
-
-            # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
-            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
-            update_handler = processor_module.perform_site_check(datastore=datastore,
-                                                                 watch_uuid=tmp_watch.get('uuid')  # probably not needed anymore anyway?
-                                                                 )
-            # Use the last loaded HTML as the input
-            update_handler.datastore = datastore
-            update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
-            update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
-
-            # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
-            # Do this as a parallel process because it could take some time
-            with ProcessPoolExecutor(max_workers=2) as executor:
-                future1 = executor.submit(_task, tmp_watch, update_handler)
-                future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
-
-                text_after_filter = future1.result()
-                text_before_filter = future2.result()
-
-    try:
-        trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
-                                                            wordlist=tmp_watch['trigger_text'],
-                                                            mode='line numbers'
-                                                            )
-    except Exception as e:
-        text_before_filter = f"Error: {str(e)}"
-
-    try:
-        text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
-        ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
-                                                           wordlist=text_to_ignore,
-                                                           mode='line numbers'
-                                                           )
-    except Exception as e:
-        text_before_filter = f"Error: {str(e)}"
-
-    logger.trace(f"Parsed in {time.time() - now:.3f}s")
-
-    return jsonify(
-        {
-            'after_filter': text_after_filter,
-            'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
-            'duration': time.time() - now,
-            'trigger_line_numbers': trigger_line_numbers,
-            'ignore_line_numbers': ignore_line_numbers,
-        }
-    )
-
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -7,7 +7,7 @@ import re
 import urllib3

 from changedetectionio.processors import difference_detection_processor
-from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
+from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
 from changedetectionio import html_tools, content_fetchers
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from loguru import logger
@@ -35,7 +35,8 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
+
        changed_detected = False
        html_content = ""
        screenshot = False  # as bytes
@@ -58,6 +59,9 @@ class perform_site_check(difference_detection_processor):
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
        # Saves a lot of CPU
        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
+        if skip_when_checksum_same:
+            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
+                raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()

        # Fetching complete, now filters

@@ -201,14 +205,22 @@ class perform_site_check(difference_detection_processor):
        if watch.get('trim_text_whitespace'):
            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())

+        if watch.get('remove_duplicate_lines'):
+            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
+
+        if watch.get('sort_text_alphabetically'):
+            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
+            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
+            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
+            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+
        # Re #340 - return the content before the 'ignore text' was applied
        # Also used to calculate/show what was removed
-        text_content_before_ignored_filter = stripped_text_from_html
+        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

        # @todo whitespace coming from missing rtrim()?
        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
        # Rewrite's the processing text based on only what diff result they want to see
-
        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
            from changedetectionio import diff
@@ -223,12 +235,12 @@ class perform_site_check(difference_detection_processor):
                                             line_feed_sep="\n",
                                             include_change_type_prefix=False)

-            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
+            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)

            if not rendered_diff and stripped_text_from_html:
                # We had some content, but no differences were found
                # Store our new file as the MD5 so it will trigger in the future
-                c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
+                c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest()
                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
            else:
                stripped_text_from_html = rendered_diff
@@ -249,6 +261,14 @@ class perform_site_check(difference_detection_processor):

        update_obj["last_check_status"] = self.fetcher.get_last_status_code()

+        # If there's text to skip
+        # @todo we could abstract out the get_text() to handle this cleaner
+        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
+        if len(text_to_ignore):
+            stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
+        else:
+            stripped_text_from_html = stripped_text_from_html.encode('utf8')
+
        # 615 Extract text by regex
        extract_text = watch.get('extract_text', [])
        if len(extract_text) > 0:
@@ -257,53 +277,39 @@ class perform_site_check(difference_detection_processor):
                # incase they specified something in '/.../x'
                if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
                    regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
-                    result = re.findall(regex, stripped_text_from_html)
+                    result = re.findall(regex.encode('utf-8'), stripped_text_from_html)

                    for l in result:
                        if type(l) is tuple:
                            # @todo - some formatter option default (between groups)
-                            regex_matched_output += list(l) + ['\n']
+                            regex_matched_output += list(l) + [b'\n']
                        else:
                            # @todo - some formatter option default (between each ungrouped result)
-                            regex_matched_output += [l] + ['\n']
+                            regex_matched_output += [l] + [b'\n']
                else:
                    # Doesnt look like regex, just hunt for plaintext and return that which matches
                    # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
-                    r = re.compile(re.escape(s_re), re.IGNORECASE)
+                    r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE)
                    res = r.findall(stripped_text_from_html)
                    if res:
                        for match in res:
-                            regex_matched_output += [match] + ['\n']
+                            regex_matched_output += [match] + [b'\n']

            ##########################################################
-            stripped_text_from_html = ''
-
+            stripped_text_from_html = b''
+            text_content_before_ignored_filter = b''
            if regex_matched_output:
                # @todo some formatter for presentation?
-                stripped_text_from_html = ''.join(regex_matched_output)
-
-        if watch.get('remove_duplicate_lines'):
-            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
+                stripped_text_from_html = b''.join(regex_matched_output)
+                text_content_before_ignored_filter = stripped_text_from_html


-        if watch.get('sort_text_alphabetically'):
-            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
-            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
-            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
-            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
-
-### CALCULATE MD5
-        # If there's text to ignore
-        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
-        text_for_checksuming = stripped_text_from_html
-        if text_to_ignore:
-            text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)

        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
-        if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
-            fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
+        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
+            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
        else:
-            fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
+            fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()

        ############ Blocking rules, after checksum #################
        blocked = False
@@ -343,13 +349,7 @@ class perform_site_check(difference_detection_processor):

        if changed_detected:
            if watch.get('check_unique_lines', False):
-                ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
-
-                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
-                    lines=stripped_text_from_html.splitlines(),
-                    ignore_whitespace=ignore_whitespace
-                )
-
+                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
                # One or more lines? unsure?
                if not has_unique_lines:
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
--- a/changedetectionio/static/js/limit.js
+++ b/changedetectionio/static/js/limit.js
@@ -0,0 +1,56 @@
+/**
+ * debounce
+ * @param {integer} milliseconds This param indicates the number of milliseconds
+ *     to wait after the last call before calling the original function.
+ * @param {object} What "this" refers to in the returned function.
+ * @return {function} This returns a function that when called will wait the
+ *     indicated number of milliseconds after the last call before
+ *     calling the original function.
+ */
+Function.prototype.debounce = function (milliseconds, context) {
+    var baseFunction = this,
+        timer = null,
+        wait = milliseconds;
+
+    return function () {
+        var self = context || this,
+            args = arguments;
+
+        function complete() {
+            baseFunction.apply(self, args);
+            timer = null;
+        }
+
+        if (timer) {
+            clearTimeout(timer);
+        }
+
+        timer = setTimeout(complete, wait);
+    };
+};
+
+/**
+* throttle
+* @param {integer} milliseconds This param indicates the number of milliseconds
+*     to wait between calls before calling the original function.
+* @param {object} What "this" refers to in the returned function.
+* @return {function} This returns a function that when called will wait the
+*     indicated number of milliseconds between calls before
+*     calling the original function.
+*/
+Function.prototype.throttle = function (milliseconds, context) {
+    var baseFunction = this,
+        lastEventTimestamp = null,
+        limit = milliseconds;
+
+    return function () {
+        var self = context || this,
+            args = arguments,
+            now = Date.now();
+
+        if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
+            lastEventTimestamp = now;
+            baseFunction.apply(self, args);
+        }
+    };
+};
--- a/changedetectionio/static/js/plugins.js
+++ b/changedetectionio/static/js/plugins.js
@@ -1,106 +1,64 @@
-(function ($) {
-    /**
-     * debounce
-     * @param {integer} milliseconds This param indicates the number of milliseconds
-     *     to wait after the last call before calling the original function.
-     * @param {object} What "this" refers to in the returned function.
-     * @return {function} This returns a function that when called will wait the
-     *     indicated number of milliseconds after the last call before
-     *     calling the original function.
-     */
-    Function.prototype.debounce = function (milliseconds, context) {
-        var baseFunction = this,
-            timer = null,
-            wait = milliseconds;
+(function($) {

-        return function () {
-            var self = context || this,
-                args = arguments;
+/*
+    $('#code-block').highlightLines([
+      {
+        'color': '#dd0000',
+        'lines': [10, 12]
+      },
+      {
+        'color': '#ee0000',
+        'lines': [15, 18]
+      }
+    ]);
+  });
+*/

-            function complete() {
-                baseFunction.apply(self, args);
-                timer = null;
-            }
+  $.fn.highlightLines = function(configurations) {
+    return this.each(function() {
+      const $pre = $(this);
+      const textContent = $pre.text();
+      const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings

-            if (timer) {
-                clearTimeout(timer);
-            }
+      // Build a map of line numbers to styles
+      const lineStyles = {};

-            timer = setTimeout(complete, wait);
-        };
-    };
-
-    /**
-     * throttle
-     * @param {integer} milliseconds This param indicates the number of milliseconds
-     *     to wait between calls before calling the original function.
-     * @param {object} What "this" refers to in the returned function.
-     * @return {function} This returns a function that when called will wait the
-     *     indicated number of milliseconds between calls before
-     *     calling the original function.
-     */
-    Function.prototype.throttle = function (milliseconds, context) {
-        var baseFunction = this,
-            lastEventTimestamp = null,
-            limit = milliseconds;
-
-        return function () {
-            var self = context || this,
-                args = arguments,
-                now = Date.now();
-
-            if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
-                lastEventTimestamp = now;
-                baseFunction.apply(self, args);
-            }
-        };
-    };
-
-    $.fn.highlightLines = function (configurations) {
-        return this.each(function () {
-            const $pre = $(this);
-            const textContent = $pre.text();
-            const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings
-
-            // Build a map of line numbers to styles
-            const lineStyles = {};
-
-            configurations.forEach(config => {
-                const {color, lines: lineNumbers} = config;
-                lineNumbers.forEach(lineNumber => {
-                    lineStyles[lineNumber] = color;
-                });
-            });
-
-            // Function to escape HTML characters
-            function escapeHtml(text) {
-                return text.replace(/[&<>"'`=\/]/g, function (s) {
-                    return "&#" + s.charCodeAt(0) + ";";
-                });
-            }
-
-            // Process each line
-            const processedLines = lines.map((line, index) => {
-                const lineNumber = index + 1; // Line numbers start at 1
-                const escapedLine = escapeHtml(line);
-                const color = lineStyles[lineNumber];
-
-                if (color) {
-                    // Wrap the line in a span with inline style
-                    return `<span style="background-color: ${color}">${escapedLine}</span>`;
-                } else {
-                    return escapedLine;
-                }
-            });
-
-            // Join the lines back together
-            const newContent = processedLines.join('\n');
-
-            // Set the new content as HTML
-            $pre.html(newContent);
+      configurations.forEach(config => {
+        const { color, lines: lineNumbers } = config;
+        lineNumbers.forEach(lineNumber => {
+          lineStyles[lineNumber] = color;
        });
-    };
-    $.fn.miniTabs = function (tabsConfig, options) {
+      });
+
+      // Function to escape HTML characters
+      function escapeHtml(text) {
+        return text.replace(/[&<>"'`=\/]/g, function(s) {
+          return "&#" + s.charCodeAt(0) + ";";
+        });
+      }
+
+      // Process each line
+      const processedLines = lines.map((line, index) => {
+        const lineNumber = index + 1; // Line numbers start at 1
+        const escapedLine = escapeHtml(line);
+        const color = lineStyles[lineNumber];
+
+        if (color) {
+          // Wrap the line in a span with inline style
+          return `<span style="background-color: ${color}">${escapedLine}</span>`;
+        } else {
+          return escapedLine;
+        }
+      });
+
+      // Join the lines back together
+      const newContent = processedLines.join('\n');
+
+      // Set the new content as HTML
+      $pre.html(newContent);
+    });
+  };
+   $.fn.miniTabs = function(tabsConfig, options) {
        const settings = {
            tabClass: 'minitab',
            tabsContainerClass: 'minitabs',
@@ -108,10 +66,10 @@
            ...(options || {})
        };

-        return this.each(function () {
+        return this.each(function() {
            const $wrapper = $(this);
            const $contents = $wrapper.find('div[id]').hide();
-            const $tabsContainer = $('<div>', {class: settings.tabsContainerClass}).prependTo($wrapper);
+            const $tabsContainer = $('<div>', { class: settings.tabsContainerClass }).prependTo($wrapper);

            // Generate tabs
            Object.entries(tabsConfig).forEach(([tabTitle, contentSelector], index) => {
@@ -126,7 +84,7 @@
            });

            // Tab click event
-            $tabsContainer.on('click', `.${settings.tabClass}`, function (e) {
+            $tabsContainer.on('click', `.${settings.tabClass}`, function(e) {
                e.preventDefault();
                const $tab = $(this);
                const target = $tab.data('target');
@@ -145,7 +103,7 @@
    // Object to store ongoing requests by namespace
    const requests = {};

-    $.abortiveSingularAjax = function (options) {
+    $.abortiveSingularAjax = function(options) {
        const namespace = options.namespace || 'default';

        // Abort the current request in this namespace if it's still ongoing
--- a/changedetectionio/static/js/recheck-proxy.js
+++ b/changedetectionio/static/js/recheck-proxy.js
@@ -1,14 +1,14 @@
 $(function () {
    /* add container before each proxy location to show status */
-    var isActive = false;

-    function setup_html_widget() {
-        var option_li = $('.fetch-backend-proxy li').filter(function () {
-            return $("input", this)[0].value.length > 0;
-        });
-        $(option_li).prepend('<div class="proxy-status"></div>');
-        $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
-    }
+    var option_li = $('.fetch-backend-proxy li').filter(function() {
+        return $("input",this)[0].value.length >0;
+    });
+
+    //var option_li = $('.fetch-backend-proxy li');
+    var isActive = false;
+    $(option_li).prepend('<div class="proxy-status"></div>');
+    $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');

    function set_proxy_check_status(proxy_key, state) {
        // select input by value name
@@ -59,14 +59,8 @@ $(function () {
    }

    $('#check-all-proxies').click(function (e) {
-
        e.preventDefault()
-
-        if (!$('body').hasClass('proxy-check-active')) {
-            setup_html_widget();
-            $('body').addClass('proxy-check-active');
-        }
-
+        $('body').addClass('proxy-check-active');
        $('.proxy-check-details').html('');
        $('.proxy-status').html('<span class="spinner"></span>').fadeIn();
        $('.proxy-timing').html('');
--- a/changedetectionio/static/js/toggle-theme.js
+++ b/changedetectionio/static/js/toggle-theme.js
@@ -49,9 +49,4 @@ $(document).ready(function () {
        $("#overlay").toggleClass('visible');
        heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)';
    });
-
-    setInterval(function () {
-        $('body').toggleClass('spinner-active', $.active > 0);
-    }, 2000);
-
 });
--- a/changedetectionio/static/js/watch-settings.js
+++ b/changedetectionio/static/js/watch-settings.js
@@ -26,28 +26,25 @@ function request_textpreview_update() {
        data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
    });

-    $('body').toggleClass('spinner-active', 1);
-
    $.abortiveSingularAjax({
        type: "POST",
        url: preview_text_edit_filters_url,
        data: data,
        namespace: 'watchEdit'
    }).done(function (data) {
-        console.debug(data['duration'])
        $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
+
        $('#filters-and-triggers #text-preview-inner')
            .text(data['after_filter'])
            .highlightLines([
                {
                    'color': '#ee0000',
                    'lines': data['trigger_line_numbers']
-                },
-                {
-                    'color': '#757575',
-                    'lines': data['ignore_line_numbers']
                }
-            ])
+            ]);
+
+
+
    }).fail(function (error) {
        if (error.statusText === 'abort') {
            console.log('Request was aborted due to a new request being fired.');
@@ -76,13 +73,18 @@ $(document).ready(function () {
    $("#text-preview-inner").css('max-height', (vh-300)+"px");
    $("#text-preview-before-inner").css('max-height', (vh-300)+"px");

+    // Realtime preview of 'Filters & Text' setup
+    var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
+
    $("#activate-text-preview").click(function (e) {
        $('body').toggleClass('preview-text-enabled')
        request_textpreview_update();
+
        const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
-        $('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000));
-        $('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000));
-        $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000));
+        $("#text-preview-refresh")[method]('click', debounced_request_textpreview_update);
+        $('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update);
+        $('input:visible')[method]('keyup blur change', debounced_request_textpreview_update);
+        $("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update);
    });
    $('.minitabs-wrapper').miniTabs({
        "Content after filters": "#text-preview-inner",
--- a/changedetectionio/static/styles/scss/parts/_extra_proxies.scss
+++ b/changedetectionio/static/styles/scss/parts/_extra_proxies.scss
@@ -25,19 +25,15 @@ ul#requests-extra_proxies {

 body.proxy-check-active {
  #request {
-    // Padding set by flex layout
-    /*
    .proxy-status {
      width: 2em;
    }
-    */

    .proxy-check-details {
      font-size: 80%;
      color: #555;
      display: block;
-      padding-left: 2em;
-      max-width: 500px;
+      padding-left: 4em;
    }

    .proxy-timing {
--- a/changedetectionio/static/styles/scss/parts/_minitabs.scss
+++ b/changedetectionio/static/styles/scss/parts/_minitabs.scss
@@ -7,16 +7,6 @@
    border-top: none;
  }

-  .minitabs-content {
-    width: 100%;
-    display: flex;
-    > div {
-      flex: 1 1 auto;
-      min-width: 0;
-      overflow: scroll;
-    }
-  }
-
  .minitabs {
    display: flex;
    border-bottom: 1px solid #ccc;
--- a/changedetectionio/static/styles/scss/parts/_preview_text_filter.scss
+++ b/changedetectionio/static/styles/scss/parts/_preview_text_filter.scss
@@ -42,8 +42,9 @@ body.preview-text-enabled {
    color: var(--color-text-input);
    font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
    font-size: 70%;
-    word-break: break-word;
+    overflow-x: scroll;
    white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
+    overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
  }
 }

--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -106,34 +106,10 @@ button.toggle-button {
  padding: 5px;
  display: flex;
  justify-content: space-between;
+  border-bottom: 2px solid var(--color-menu-accent);
  align-items: center;
 }

-#pure-menu-horizontal-spinner {
-  height: 3px;
-  background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
-  background-size: 400% 400%;
-  width: 100%;
-  animation: gradient 200s ease infinite;
-}
-
-body.spinner-active {
-  #pure-menu-horizontal-spinner {
-    animation: gradient 1s ease infinite;
-  }
-}
-
-@keyframes gradient {
-	0% {
-		background-position: 0% 50%;
-	}
-	50% {
-		background-position: 100% 50%;
-	}
-	100% {
-		background-position: 0% 50%;
-	}
-}
 .pure-menu-heading {
  color: var(--color-text-menu-heading);
 }
@@ -147,14 +123,8 @@ body.spinner-active {
  }
 }

-
-.tabs ul li a {
-  // .tab-pane-inner will have the #id that the tab button jumps/anchors to
-  scroll-margin-top: 200px;
-}
-
 section.content {
-  padding-top: 100px;
+  padding-top: 5em;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
@@ -937,7 +907,6 @@ $form-edge-padding: 20px;
 }

 .tab-pane-inner {
-
  &:not(:target) {
    display: none;
  }
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -119,22 +119,19 @@ ul#requests-extra_proxies {
  #request label[for=proxy] {
    display: inline-block; }

-body.proxy-check-active #request {
-  /*
-    .proxy-status {
-      width: 2em;
-    }
-    */ }
-  body.proxy-check-active #request .proxy-check-details {
-    font-size: 80%;
-    color: #555;
-    display: block;
-    padding-left: 2em;
-    max-width: 500px; }
-  body.proxy-check-active #request .proxy-timing {
-    font-size: 80%;
-    padding-left: 1rem;
-    color: var(--color-link); }
+body.proxy-check-active #request .proxy-status {
+  width: 2em; }
+
+body.proxy-check-active #request .proxy-check-details {
+  font-size: 80%;
+  color: #555;
+  display: block;
+  padding-left: 4em; }
+
+body.proxy-check-active #request .proxy-timing {
+  font-size: 80%;
+  padding-left: 1rem;
+  color: var(--color-link); }

 #recommended-proxy {
  display: grid;
@@ -437,13 +434,6 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
    padding: 20px;
    border: 1px solid #ccc;
    border-top: none; }
-  .minitabs-wrapper .minitabs-content {
-    width: 100%;
-    display: flex; }
-    .minitabs-wrapper .minitabs-content > div {
-      flex: 1 1 auto;
-      min-width: 0;
-      overflow: scroll; }
  .minitabs-wrapper .minitabs {
    display: flex;
    border-bottom: 1px solid #ccc; }
@@ -498,9 +488,11 @@ body.preview-text-enabled {
    font-family: "Courier New", Courier, monospace;
    /* Sets the font to a monospace type */
    font-size: 70%;
-    word-break: break-word;
+    overflow-x: scroll;
    white-space: pre-wrap;
-    /* Preserves whitespace and line breaks like <pre> */ }
+    /* Preserves whitespace and line breaks like <pre> */
+    overflow-wrap: break-word;
+    /* Allows long words to break and wrap to the next line */ }

 #activate-text-preview {
  right: 0;
@@ -576,26 +568,9 @@ button.toggle-button {
  padding: 5px;
  display: flex;
  justify-content: space-between;
+  border-bottom: 2px solid var(--color-menu-accent);
  align-items: center; }

-#pure-menu-horizontal-spinner {
-  height: 3px;
-  background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
-  background-size: 400% 400%;
-  width: 100%;
-  animation: gradient 200s ease infinite; }
-
-body.spinner-active #pure-menu-horizontal-spinner {
-  animation: gradient 1s ease infinite; }
-
-@keyframes gradient {
-  0% {
-    background-position: 0% 50%; }
-  50% {
-    background-position: 100% 50%; }
-  100% {
-    background-position: 0% 50%; } }
-
 .pure-menu-heading {
  color: var(--color-text-menu-heading); }

@@ -605,12 +580,8 @@ body.spinner-active #pure-menu-horizontal-spinner {
    background-color: var(--color-background-menu-link-hover);
    color: var(--color-text-menu-link-hover); }

-
-.tabs ul li a {
-  scroll-margin-top: 200px; }
-
 section.content {
-  padding-top: 100px;
+  padding-top: 5em;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -4,7 +4,6 @@ from flask import (
    flash
 )

-from .html_tools import TRANSLATE_WHITESPACE_TABLE
 from . model import App, Watch
 from copy import deepcopy, copy
 from os import path, unlink
@@ -751,17 +750,17 @@ class ChangeDetectionStore:
    def update_5(self):
        # If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings
        # In other words - the watch notification_title and notification_body are not needed if they are the same as the default one
-        current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
-        current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
+        current_system_body = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
+        current_system_title = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
        for uuid, watch in self.data['watching'].items():
            try:
                watch_body = watch.get('notification_body', '')
-                if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
+                if watch_body and watch_body.translate(str.maketrans('', '', "\r\n ")) == current_system_body:
                    # Looks the same as the default one, so unset it
                    watch['notification_body'] = None

                watch_title = watch.get('notification_title', '')
-                if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
+                if watch_title and watch_title.translate(str.maketrans('', '', "\r\n ")) == current_system_title:
                    # Looks the same as the default one, so unset it
                    watch['notification_title'] = None
            except Exception as e:
--- a/changedetectionio/templates/base.html
+++ b/changedetectionio/templates/base.html
@@ -35,9 +35,7 @@

  <body class="">
    <div class="header">
-    <div class="pure-menu-fixed" style="width: 100%;">
-      <div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
-
+      <div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
        {% if has_password and not current_user.is_authenticated %}
          <a class="pure-menu-heading" href="https://changedetection.io" rel="noopener">
            <strong>Change</strong>Detection.io</a>
@@ -131,12 +129,7 @@
          </li>
        </ul>
      </div>
-      <div id="pure-menu-horizontal-spinner"></div>
-      </div>
-
    </div>
-
-
    {% if hosted_sticky %}
      <div class="sticky-tab" id="hosted-sticky">
        <a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a>
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -26,6 +26,7 @@
 </script>
 <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
+<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
 {% if playwright_enabled %}
@@ -329,9 +330,9 @@ nav
                        {{ render_checkbox_field(form.filter_text_added) }}
                        {{ render_checkbox_field(form.filter_text_replaced) }}
                        {{ render_checkbox_field(form.filter_text_removed) }}
-                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br>
-                    <span class="pure-form-message-inline">&nbsp;So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
-                    <span class="pure-form-message-inline">&nbsp;When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
+                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span>
+                    <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
+                    <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
                </fieldset>
                <fieldset class="pure-control-group">
                    {{ render_checkbox_field(form.check_unique_lines) }}
@@ -370,7 +371,7 @@ nav
 ") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
+                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
@@ -397,9 +398,7 @@ Unavailable") }}
                </fieldset>
                <fieldset>
                    <div class="pure-control-group">
-                        {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
- or
-keyword") }}
+                        {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
                        <span class="pure-form-message-inline">
                    <ul>
                        <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
@@ -425,15 +424,14 @@ keyword") }}
                    </script>
                    <br>
                    {#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
+
                    <div class="minitabs-wrapper">
-                      <div class="minitabs-content">
-                          <div id="text-preview-inner" class="monospace-preview">
-                              <p>Loading...</p>
-                          </div>
-                          <div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
-                              <p>Loading...</p>
-                          </div>
-                      </div>
+                        <div id="text-preview-inner" class="monospace-preview">
+                            <p>Loading...</p>
+                        </div>
+                        <div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
+                            <p>Loading...</p>
+                        </div>
                    </div>
            </div>
          </div>
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@@ -172,7 +172,7 @@ nav
                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
+                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
--- a/changedetectionio/tests/itemprop_test_examples/README.md
+++ b/changedetectionio/tests/itemprop_test_examples/README.md
@@ -1,6 +0,0 @@
-# A list of real world examples!
-
-Always the price should be 666.66 for our tests
-
-see test_restock_itemprop.py::test_special_prop_examples
-
--- a/changedetectionio/tests/itemprop_test_examples/a.txt
+++ b/changedetectionio/tests/itemprop_test_examples/a.txt
@@ -1,25 +0,0 @@
-<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
-     data-testid="price-section"
-     data-optly-product-tile-price-section="true"><span
-        class="PriceRange ProductPrice variant-huge" itemprop="offers"
-        itemscope="" itemtype="http://schema.org/Offer"><div
-        class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
-        aria-hidden="true" class="Price variant-huge" data-testid="price"
-        itemprop="price"><sup class="sup" data-testid="price-symbol"
-                              itemprop="priceCurrency" content="AUD">$</sup><span
-        class="dollars" data-testid="price-value" itemprop="price"
-        content="155.55">155.55</span><span class="extras"><span class="sup"
-                                                              data-testid="price-sup"></span></span></span></span>
-</div>
-
-<script type="application/ld+json">{
-                                "@type": "Product",
-                                "@context": "https://schema.org",
-                                "name": "test",
-                                "description": "test",
-                                "offers": {
-                                    "@type": "Offer",
-                                    "priceCurrency": "AUD",
-                                    "price": 155.55
-                                },
-                            }</script>
--- a/changedetectionio/tests/proxy_list/test_proxy.py
+++ b/changedetectionio/tests/proxy_list/test_proxy.py
@@ -16,4 +16,4 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    )

    assert b"1 Imported" in res.data
-    wait_for_all_checks(client)
+    time.sleep(3)
--- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
+++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-import json
 import os
 from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks


 def set_response():
@@ -19,6 +18,7 @@ def set_response():
        f.write(data)
    time.sleep(1)

+
 def test_socks5(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
    set_response()
@@ -79,24 +79,3 @@ def test_socks5(client, live_server, measure_memory_usage):

    # Should see the proper string
    assert "Awesome, you made it".encode('utf-8') in res.data
-
-    # PROXY CHECKER WIDGET CHECK - this needs more checking
-    uuid = extract_UUID_from_client(client)
-
-    res = client.get(
-        url_for("check_proxies.start_check", uuid=uuid),
-        follow_redirects=True
-    )
-    # It's probably already finished super fast :(
-    #assert b"RUNNING" in res.data
-    
-    wait_for_all_checks(client)
-    res = client.get(
-        url_for("check_proxies.get_recheck_status", uuid=uuid),
-        follow_redirects=True
-    )
-    assert b"OK" in res.data
-
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
-
--- a/changedetectionio/tests/test_add_replace_remove_filter.py
+++ b/changedetectionio/tests/test_add_replace_remove_filter.py
@@ -77,8 +77,6 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory

    # The trigger line is REMOVED,  this should trigger
    set_original(excluding='The golden line')
-
-    # Check in the processor here what's going on, its triggering empty-reply and no change.
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
@@ -153,6 +151,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa

    # A line thats not the trigger should not trigger anything
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+
    assert b'1 watches queued for rechecking.' in res.data

    wait_for_all_checks(client)
@@ -174,5 +173,6 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
        assert b'-Oh yes please-' in response
        assert '网站监测 内容更新了'.encode('utf-8') in response

+
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@@ -5,7 +5,7 @@ import time
 from flask import url_for

 from ..html_tools import *
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup


 def test_setup(live_server):
@@ -119,10 +119,12 @@ across multiple lines


 def test_element_removal_full(client, live_server, measure_memory_usage):
-    #live_server_setup(live_server)
+    sleep_time_for_fetch_thread = 3

    set_original_response()

+    # Give the endpoint time to spin up
+    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for("test_endpoint", _external=True)
@@ -130,8 +132,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        url_for("import_page"), data={"urls": test_url}, follow_redirects=True
    )
    assert b"1 Imported" in res.data
-    wait_for_all_checks(client)
-
+    time.sleep(1)
    # Goto the edit page, add the filter data
    # Not sure why \r needs to be added - absent of the #changetext this is not necessary
    subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
@@ -147,7 +148,6 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        follow_redirects=True,
    )
    assert b"Updated watch." in res.data
-    wait_for_all_checks(client)

    # Check it saved
    res = client.get(
@@ -156,10 +156,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data

    # Trigger a check
-    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    assert b'1 watches queued for rechecking.' in res.data
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)

-    wait_for_all_checks(client)
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)

    # so that we set the state to 'unviewed' after all the edits
    client.get(url_for("diff_history_page", uuid="first"))
@@ -168,11 +168,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    set_modified_response()

    # Trigger a check
-    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    assert b'1 watches queued for rechecking.' in res.data
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
-    wait_for_all_checks(client)
+    time.sleep(sleep_time_for_fetch_thread)

    # There should not be an unviewed change, as changes should be removed
    res = client.get(url_for("index"))
--- a/changedetectionio/tests/test_extract_regex.py
+++ b/changedetectionio/tests/test_extract_regex.py
@@ -71,7 +71,7 @@ def test_setup(client, live_server, measure_memory_usage):
    live_server_setup(live_server)

 def test_check_filter_multiline(client, live_server, measure_memory_usage):
-   # live_server_setup(live_server)
+    #live_server_setup(live_server)
    set_multiline_response()

    # Add our URL to the import page
--- a/changedetectionio/tests/test_ignore_regex_text.py
+++ b/changedetectionio/tests/test_ignore_regex_text.py
@@ -33,17 +33,13 @@ def test_strip_regex_text_func():

    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)

-    assert "but 1 lines" in stripped_content
-    assert "igNORe-cAse text" not in stripped_content
-    assert "but 1234 lines" not in stripped_content
-    assert "really" not in stripped_content
-    assert "not this" not in stripped_content
+    assert b"but 1 lines" in stripped_content
+    assert b"igNORe-cAse text" not in stripped_content
+    assert b"but 1234 lines" not in stripped_content
+    assert b"really" not in stripped_content
+    assert b"not this" not in stripped_content

    # Check line number reporting
    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
    assert stripped_content == [2, 5, 6, 7, 8, 10]

-    # Check that linefeeds are preserved when there are is no matching ignores
-    content = "some text\n\nand other text\n"
-    stripped_content = html_tools.strip_ignore_text(content, ignore_lines)
-    assert content == stripped_content
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@@ -22,15 +22,10 @@ def test_strip_text_func():
    ignore_lines = ["sometimes"]

    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
-    assert "sometimes" not in stripped_content
-    assert "Some content" in stripped_content

-    # Check that line feeds dont get chewed up when something is found
-    test_content = "Some initial text\n\nWhich is across multiple lines\n\nZZZZz\n\n\nSo let's see what happens."
-    ignore = ['something irrelevent but just to check', 'XXXXX', 'YYYYY', 'ZZZZZ']
+    assert b"sometimes" not in stripped_content
+    assert b"Some content" in stripped_content

-    stripped_content = html_tools.strip_ignore_text(test_content, ignore)
-    assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."

 def set_original_ignore_response():
    test_return_data = """<html>
@@ -146,6 +141,8 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa



+
+
    # Just to be sure.. set a regular modified change..
    set_modified_original_ignore_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -156,17 +153,17 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa

    res = client.get(url_for("preview_page", uuid="first"))

-    # SHOULD BE be in the preview, it was added in set_modified_original_ignore_response()
-    # and we have "new ignore stuff" in ignore_text
-    # it is only ignored, it is not removed (it will be highlighted too)
-    assert b'new ignore stuff' in res.data
+    # Should no longer be in the preview
+    assert b'new ignore stuff' not in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

-# When adding some ignore text, it should not trigger a change, even if something else on that line changes
 def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
-    #live_server_setup(live_server)
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+
    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
    set_original_ignore_response()

@@ -175,7 +172,6 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
        url_for("settings_page"),
        data={
            "requests-time_between_check-minutes": 180,
-            "application-ignore_whitespace": "y",
            "application-global_ignore_text": ignore_text,
            'application-fetch_backend': "html_requests"
        },
@@ -196,7 +192,9 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
    # Give the thread time to pick it up
    wait_for_all_checks(client)

-    #Adding some ignore text should not trigger a change
+
+    # Goto the edit page of the item, add our ignore text
+    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"},
@@ -212,15 +210,20 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem

    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+
+    # Give the thread time to pick it up
    wait_for_all_checks(client)
-    # It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
+
+    # so that we are sure everything is viewed and in a known 'nothing changed' state
+    res = client.get(url_for("diff_history_page", uuid="first"))
+
+    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data
-#####

-    # Make a change which includes the ignore text, it should be ignored and no 'change' triggered
-    # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
+
+    #  Make a change which includes the ignore text
    set_modified_ignore_response()

    # Trigger a check
@@ -230,7 +233,6 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem

    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
-
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data

--- a/changedetectionio/tests/test_live_preview.py
+++ b/changedetectionio/tests/test_live_preview.py
@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-
-from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
-
-
-def set_response():
-
-    data = f"""<html>
-       <body>Awesome, you made it<br>
-yeah the socks request worked<br>
-something to ignore<br>
-something to trigger<br>
-     </body>
-     </html>
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(data)
-
-def test_content_filter_live_preview(client, live_server, measure_memory_usage):
-    live_server_setup(live_server)
-    set_response()
-
-    test_url = url_for('test_endpoint', _external=True)
-
-    res = client.post(
-        url_for("form_quick_watch_add"),
-        data={"url": test_url, "tags": ''},
-        follow_redirects=True
-    )
-    uuid = extract_UUID_from_client(client)
-    res = client.post(
-        url_for("edit_page", uuid=uuid),
-        data={
-            "include_filters": "",
-            "fetch_backend": 'html_requests',
-            "ignore_text": "something to ignore",
-            "trigger_text": "something to trigger",
-            "url": test_url,
-        },
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data
-    wait_for_all_checks(client)
-
-    # The endpoint is a POST and accepts the form values to override the watch preview
-    import json
-
-    # DEFAULT OUTPUT WITHOUT ANYTHING UPDATED/CHANGED - SHOULD SEE THE WATCH DEFAULTS
-    res = client.post(
-        url_for("watch_get_preview_rendered", uuid=uuid)
-    )
-    default_return = json.loads(res.data.decode('utf-8'))
-    assert default_return.get('after_filter')
-    assert default_return.get('before_filter')
-    assert default_return.get('ignore_line_numbers') == [3] # "something to ignore" line 3
-    assert default_return.get('trigger_line_numbers') == [4] # "something to trigger" line 4
-
-    # SEND AN UPDATE AND WE SHOULD SEE THE OUTPUT CHANGE SO WE KNOW TO HIGHLIGHT NEW STUFF
-    res = client.post(
-        url_for("watch_get_preview_rendered", uuid=uuid),
-        data={
-            "include_filters": "",
-            "fetch_backend": 'html_requests',
-            "ignore_text": "sOckS", # Also be sure case insensitive works
-            "trigger_text": "AweSOme",
-            "url": test_url,
-        },
-    )
-    reply = json.loads(res.data.decode('utf-8'))
-    assert reply.get('after_filter')
-    assert reply.get('before_filter')
-    assert reply.get('ignore_line_numbers') == [2]  # Ignored - "socks" on line 2
-    assert reply.get('trigger_line_numbers') == [1]  # Triggers "Awesome" in line 1
-
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_preview_endpoints.py
+++ b/changedetectionio/tests/test_preview_endpoints.py
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-
-import time
-from flask import url_for
-from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
-
-
-# `subtractive_selectors` should still work in `source:` type requests
-def test_fetch_pdf(client, live_server, measure_memory_usage):
-    import shutil
-    shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
-
-    live_server_setup(live_server)
-    test_url = url_for('test_pdf_endpoint', _external=True)
-    # Add our URL to the import page
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-
-    wait_for_all_checks(client)
-
-    res = client.get(
-        url_for("preview_page", uuid="first"),
-        follow_redirects=True
-    )
-
-    # PDF header should not be there (it was converted to text)
-    assert b'PDF' not in res.data[:10]
-    assert b'hello world' in res.data
-
-    # So we know if the file changes in other ways
-    import hashlib
-    original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
-    # We should have one
-    assert len(original_md5) > 0
-    # And it's going to be in the document
-    assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data
-
-    shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
-    changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
-    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    assert b'1 watches queued for rechecking.' in res.data
-
-    wait_for_all_checks(client)
-
-    # Now something should be ready, indicated by having a 'unviewed' class
-    res = client.get(url_for("index"))
-    assert b'unviewed' in res.data
-
-    # The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
-
-    res = client.get(
-        url_for("preview_page", uuid="first"),
-        follow_redirects=True
-    )
-
-    assert original_md5.encode('utf-8') not in res.data
-    assert changed_md5.encode('utf-8') in res.data
-
-    res = client.get(
-        url_for("diff_history_page", uuid="first"),
-        follow_redirects=True
-    )
-
-    assert original_md5.encode('utf-8') in res.data
-    assert changed_md5.encode('utf-8') in res.data
-
-    assert b'here is a change' in res.data
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
+from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 from ..notification import default_notification_format

 instock_props = [
@@ -413,31 +413,3 @@ def test_data_sanity(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"))
    assert test_url2.encode('utf-8') in res.data
-
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
-
-# All examples should give a prive of 666.66
-def test_special_prop_examples(client, live_server):
-    import glob
-    #live_server_setup(live_server)
-
-    test_url = url_for('test_endpoint', _external=True)
-    check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
-    files = glob.glob(check_path)
-    assert files
-    for test_example_filename in files:
-        with open(test_example_filename, 'r') as example_f:
-            with open("test-datastore/endpoint-content.txt", "w") as test_f:
-                test_f.write(f"<html><body>{example_f.read()}</body></html>")
-
-            # Now fetch it and check the price worked
-            client.post(
-                url_for("form_quick_watch_add"),
-                data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
-                follow_redirects=True
-            )
-            wait_for_all_checks(client)
-            res = client.get(url_for("index"))
-            assert b'ception' not in res.data
-            assert b'155.55' in res.data
--- a/changedetectionio/tests/unit/test_watch_model.py
+++ b/changedetectionio/tests/unit/test_watch_model.py
@@ -18,13 +18,12 @@ class TestDiffBuilder(unittest.TestCase):

        watch['last_viewed'] = 110

-        # Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python
-        watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents="hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents="hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents=b"hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents=b"hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents=b"hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents=b"hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents=b"hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents=b"hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))

        p = watch.get_next_snapshot_key_to_last_viewed
        assert p == "112", "Correct last-viewed timestamp was detected"
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -260,6 +260,9 @@ class update_worker(threading.Thread):
                    try:
                        # Processor is what we are using for detecting the "Change"
                        processor = watch.get('processor', 'text_json_diff')
+                        # Abort processing when the content was the same as the last fetch
+                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
+

                        # Init a new 'difference_detection_processor', first look in processors
                        processor_module_name = f"changedetectionio.processors.{processor}.processor"
@@ -275,13 +278,16 @@ class update_worker(threading.Thread):

                        update_handler.call_browser()

-                        changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(
+                            watch=watch,
+                            skip_when_checksum_same=skip_when_same_checksum,
+                        )

                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
                        # We then convert/.decode('utf-8') for the notification etc
-#                        if not isinstance(contents, (bytes, bytearray)):
-#                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
+                        if not isinstance(contents, (bytes, bytearray)):
+                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
                    except PermissionError as e:
                        logger.critical(f"File permission error updating file, watch: {uuid}")
                        logger.critical(str(e))
@@ -332,8 +338,7 @@ class update_worker(threading.Thread):
                        elif e.status_code == 500:
                            err_text = "Error - 500 (Internal server error) received from the web site"
                        else:
-                            extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
-                            err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
+                            err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))

                        if e.screenshot:
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)