Merge branch 'master' into extra-filters

Filters should apply at the end of the chain
2026-05-03 08:10:37 +00:00 · 2024-09-17 18:32:14 +02:00 · 2024-09-11 11:32:12 +02:00 · 2024-09-10 14:28:58 +02:00 · 2024-09-10 12:32:07 +02:00 · 2024-09-09 22:19:50 +02:00
46 changed files with 416 additions and 1000 deletions
@@ -1,6 +1,5 @@
 # include the decorator
 from apprise.decorators import notify
-from loguru import logger

@notify(on="delete")
@notify(on="deletes")
@@ -65,12 +64,10 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
            auth = (URLBase.unquote(results.get('user')))

    # Try to auto-guess if it's JSON
-    h = 'application/json; charset=utf-8'
    try:
        json.loads(body)
-        headers['Content-Type'] = h
+        headers['Content-Type'] = 'application/json; charset=utf-8'
    except ValueError as e:
-        logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}")
        pass

    r(results.get('url'),
@@ -89,13 +89,11 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                    {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
 footer
 nav
-.stockticker
-//*[contains(text(), 'Advertisement')]") }}
+.stockticker") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                          <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
-                          <li> Don't paste HTML here, use only CSS and XPath selectors </li>
-                          <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
+                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
+                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
                </fieldset>
@@ -4,9 +4,7 @@ from loguru import logger
 from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
 import os

-# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
-visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
-
+visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary'

 # available_fetchers() will scan this implementation looking for anything starting with html_
 # this information is used in the form selections
@@ -154,14 +154,10 @@ function isItemInStock() {
        }

        elementText = "";
-        try {
-            if (element.tagName.toLowerCase() === "input") {
-                elementText = element.value.toLowerCase().trim();
-            } else {
-                elementText = getElementBaseText(element);
-            }
-        } catch (e) {
-            console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
+        if (element.tagName.toLowerCase() === "input") {
+            elementText = element.value.toLowerCase().trim();
+        } else {
+            elementText = getElementBaseText(element);
        }

        if (elementText.length) {
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3

 import datetime
+import importlib

 import flask_login
 import locale
@@ -11,7 +12,9 @@ import threading
 import time
 import timeago

+from .content_fetchers.exceptions import ReplyWithContentButNoText
 from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
+from .processors.text_json_diff.processor import FilterNotFoundInResponse
 from .safe_jinja import render as jinja_render
 from changedetectionio.strtobool import strtobool
 from copy import deepcopy
@@ -1155,6 +1158,8 @@ def changedetection_app(config=None, datastore_o=None):
    @login_optionally_required
    def preview_page(uuid):
        content = []
+        ignored_line_numbers = []
+        trigger_line_numbers = []
        versions = []
        timestamp = None

@@ -1171,10 +1176,11 @@ def changedetection_app(config=None, datastore_o=None):
        system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
        extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]

+
        is_html_webdriver = False
        if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
            is_html_webdriver = True
-        triggered_line_numbers = []
+
        if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
            flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
        else:
@@ -1187,12 +1193,31 @@ def changedetection_app(config=None, datastore_o=None):

            try:
                versions = list(watch.history.keys())
-                content = watch.get_history_snapshot(timestamp)
+                tmp = watch.get_history_snapshot(timestamp).splitlines()

-                triggered_line_numbers = html_tools.strip_ignore_text(content=content,
-                                                                      wordlist=watch['trigger_text'],
-                                                                      mode='line numbers'
-                                                                      )
+                # Get what needs to be highlighted
+                ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
+
+                # .readlines will keep the \n, but we will parse it here again, in the future tidy this up
+                ignored_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
+                                                                    wordlist=ignore_rules,
+                                                                    mode='line numbers'
+                                                                    )
+
+                trigger_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
+                                                                    wordlist=watch['trigger_text'],
+                                                                    mode='line numbers'
+                                                                    )
+                # Prepare the classes and lines used in the template
+                i=0
+                for l in tmp:
+                    classes=[]
+                    i+=1
+                    if i in ignored_line_numbers:
+                        classes.append('ignored')
+                    if i in trigger_line_numbers:
+                        classes.append('triggered')
+                    content.append({'line': l, 'classes': ' '.join(classes)})

            except Exception as e:
                content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
@@ -1203,7 +1228,8 @@ def changedetection_app(config=None, datastore_o=None):
                                 history_n=watch.history_n,
                                 extra_stylesheets=extra_stylesheets,
                                 extra_title=f" - Diff - {watch.label} @ {timestamp}",
-                                 triggered_line_numbers=triggered_line_numbers,
+                                 ignored_line_numbers=ignored_line_numbers,
+                                 triggered_line_numbers=trigger_line_numbers,
                                 current_diff_url=watch['url'],
                                 screenshot=watch.get_screenshot(),
                                 watch=watch,
@@ -1374,13 +1400,55 @@ def changedetection_app(config=None, datastore_o=None):
        # Return a 500 error
        abort(500)

-    # Ajax callback
    @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
    @login_optionally_required
    def watch_get_preview_rendered(uuid):
        '''For when viewing the "preview" of the rendered text from inside of Edit'''
-        from .processors.text_json_diff import prepare_filter_prevew
-        return prepare_filter_prevew(watch_uuid=uuid, datastore=datastore)
+        now = time.time()
+        import brotli
+        from . import forms
+
+        text_after_filter = ''
+        tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
+
+        if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
+            # Splice in the temporary stuff from the form
+            form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
+                                                       data=request.form
+                                                       )
+            # Only update vars that came in via the AJAX post
+            p = {k: v for k, v in form.data.items() if k in request.form.keys()}
+            tmp_watch.update(p)
+
+            latest_filename = next(reversed(tmp_watch.history))
+            html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
+            with open(html_fname, 'rb') as f:
+                decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
+
+                # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
+                processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
+                update_handler = processor_module.perform_site_check(datastore=datastore,
+                                                                     watch_uuid=uuid # probably not needed anymore anyway?
+                                                                     )
+                # Use the last loaded HTML as the input
+                update_handler.fetcher.content = decompressed_data
+                try:
+                    changed_detected, update_obj, contents, text_after_filter = update_handler.run_changedetection(
+                        watch=tmp_watch,
+                        skip_when_checksum_same=False,
+                    )
+                except FilterNotFoundInResponse as e:
+                    text_after_filter = f"Filter not found in HTML: {str(e)}"
+                except ReplyWithContentButNoText as e:
+                    text_after_filter = f"Filter found but no text (empty result)"
+                except Exception as e:
+                    text_after_filter = f"Error: {str(e)}"
+
+            if not text_after_filter.strip():
+                text_after_filter = 'Empty content'
+
+        logger.trace(f"Parsed in {time.time()-now:.3f}s")
+        return text_after_filter.strip()


    @app.route("/form/add/quickwatch", methods=['POST'])
@@ -1,6 +1,5 @@
 import os
 import re
-from loguru import logger

 from changedetectionio.strtobool import strtobool

@@ -470,13 +469,13 @@ class processor_text_json_diff_form(commonSettingsForm):

    include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')

-    subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
+    subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])

    extract_text = StringListField('Extract text', [ValidateListRegex()])

    title = StringField('Title', default='')

-    ignore_text = StringListField('Remove lines containing', [ValidateListRegex()])
+    ignore_text = StringListField('Ignore text', [ValidateListRegex()])
    headers = StringDictKeyValue('Request headers')
    body = TextAreaField('Request body', [validators.Optional()])
    method = SelectField('Request method', choices=valid_method, default=default_method)
@@ -526,16 +525,9 @@ class processor_text_json_diff_form(commonSettingsForm):
        try:
            from changedetectionio.safe_jinja import render as jinja_render
            jinja_render(template_str=self.url.data)
-        except ModuleNotFoundError as e:
-            # incase jinja2_time or others is missing
-            logger.error(e)
-            self.url.errors.append(e)
-            result = False
        except Exception as e:
-            logger.error(e)
            self.url.errors.append('Invalid template syntax')
            result = False
-
        return result

 class SingleExtraProxy(Form):
@@ -586,7 +578,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
-    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
+    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
    ignore_whitespace = BooleanField('Ignore whitespace')
    password = SaltyPasswordField()
    pager_size = IntegerField('Pager size',
@@ -1,5 +1,4 @@
 from typing import List
-from lxml import etree
 import json
 import re

@@ -58,26 +57,11 @@ def subtractive_css_selector(css_selector, html_content):
        item.decompose()
    return str(soup)

-def subtractive_xpath_selector(xpath_selector, html_content): 
-    html_tree = etree.HTML(html_content)
-    elements_to_remove = html_tree.xpath(xpath_selector)
-
-    for element in elements_to_remove:
-        element.getparent().remove(element)
-
-    modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
-    return modified_html

 def element_removal(selectors: List[str], html_content):
-    """Removes elements that match a list of CSS or xPath selectors."""
-    modified_html = html_content
-    for selector in selectors:
-        if selector.startswith(('xpath:', 'xpath1:', '//')):
-            xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
-            modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
-        else:
-            modified_html = subtractive_css_selector(selector, modified_html)
-    return modified_html
+    """Joins individual filters into one css filter."""
+    selector = ",".join(selectors)
+    return subtractive_css_selector(selector, html_content)

 def elementpath_tostring(obj):
    """
@@ -36,9 +36,8 @@ class model(watch_base):
    jitter_seconds = 0

    def __init__(self, *arg, **kw):
-        self.__datastore_path = kw.get('datastore_path')
-        if kw.get('datastore_path'):
-            del kw['datastore_path']
+        self.__datastore_path = kw['datastore_path']
+        del kw['datastore_path']
        super(model, self).__init__(*arg, **kw)
        if kw.get('default'):
            self.update(kw['default'])
@@ -172,10 +171,6 @@ class model(watch_base):
        """
        tmp_history = {}

-        # In the case we are only using the watch for processing without history
-        if not self.watch_data_dir:
-            return []
-
        # Read the history file as a dict
        fname = os.path.join(self.watch_data_dir, "history.txt")
        if os.path.isfile(fname):
@@ -401,8 +396,8 @@ class model(watch_base):
    @property
    def watch_data_dir(self):
        # The base dir of the watch data
-        return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
-
+        return os.path.join(self.__datastore_path, self['uuid'])
+    
    def get_error_text(self):
        """Return the text saved from a previous request that resulted in a non-200 error"""
        fname = os.path.join(self.watch_data_dir, "last-error.txt")
@@ -18,7 +18,6 @@ class watch_base(dict):
            'check_count': 0,
            'check_unique_lines': False,  # On change-detected, compare against all history if its something new
            'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine.
-            'content-type': None,
            'date_created': None,
            'extract_text': [],  # Extract text by regex after filters
            'extract_title_as_title': False,
@@ -60,9 +59,8 @@ class watch_base(dict):
            'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
            'time_between_check_use_default': True,
            'title': None,
-            'track_ldjson_price_data': None,
            'trim_text_whitespace': False,
-            'remove_duplicate_lines': False,
+            'track_ldjson_price_data': None,
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
            'url': '',
            'uuid': str(uuid.uuid4()),
@@ -1,14 +1,16 @@
 from abc import abstractmethod
+
 from changedetectionio.content_fetchers.base import Fetcher
 from changedetectionio.strtobool import strtobool
+
 from copy import deepcopy
 from loguru import logger
 import hashlib
-import importlib
-import inspect
 import os
-import pkgutil
 import re
+import importlib
+import pkgutil
+import inspect

 class difference_detection_processor():

@@ -155,12 +157,12 @@ class difference_detection_processor():
        # After init, call run_changedetection() which will do the actual change-detection

    @abstractmethod
-    def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
        changed_detected = False
-        return changed_detected, update_obj, ''.encode('utf-8')
+        return changed_detected, update_obj, ''.encode('utf-8'), b''


 def find_sub_packages(package_name):
@@ -143,6 +143,8 @@ class perform_site_check(difference_detection_processor):
    def run_changedetection(self, watch, skip_when_checksum_same=True):
        import hashlib

+        from concurrent.futures import ProcessPoolExecutor
+        from functools import partial
        if not watch:
            raise Exception("Watch no longer exists.")

@@ -156,20 +158,6 @@ class perform_site_check(difference_detection_processor):
        update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
        update_obj["last_check_status"] = self.fetcher.get_last_status_code()

-        # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
-        # Otherwise it will assume "in stock" because nothing suggesting the opposite was found
-        from ...html_tools import html_to_text
-        text = html_to_text(self.fetcher.content)
-        logger.debug(f"Length of text after conversion: {len(text)}")
-        if not len(text):
-            from ...content_fetchers.exceptions import ReplyWithContentButNoText
-            raise ReplyWithContentButNoText(url=watch.link,
-                                            status_code=self.fetcher.get_last_status_code(),
-                                            screenshot=self.fetcher.screenshot,
-                                            html_content=self.fetcher.content,
-                                            xpath_data=self.fetcher.xpath_data
-                                            )
-
        # Which restock settings to compare against?
        restock_settings = watch.get('restock_settings', {})

@@ -184,7 +172,11 @@ class perform_site_check(difference_detection_processor):

        itemprop_availability = {}
        try:
-            itemprop_availability = get_itemprop_availability(self.fetcher.content)
+            with ProcessPoolExecutor() as executor:
+                # Use functools.partial to create a callable with arguments
+                # anything using bs4/lxml etc is quite "leaky"
+                future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
+                itemprop_availability = future.result()
        except MoreThanOnePriceFound as e:
            # Add the real data
            raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
@@ -229,21 +221,12 @@ class perform_site_check(difference_detection_processor):
                xpath_data=self.fetcher.xpath_data
                )

-        logger.debug(f"self.fetcher.instock_data is - '{self.fetcher.instock_data}' and itemprop_availability.get('availability') is {itemprop_availability.get('availability')}")
        # Nothing automatic in microdata found, revert to scraping the page
        if self.fetcher.instock_data and itemprop_availability.get('availability') is None:
            # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
            # Careful! this does not really come from chrome/js when the watch is set to plaintext
            update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
-            logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.")
-
-        # Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that.
-        if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock':
-            if update_obj['restock'].get('in_stock'):
-                logger.warning(
-                    f"Lie detected in the availability machine data!! when scraping said its not in stock!! itemprop was '{itemprop_availability}' and scraped from browser was '{self.fetcher.instock_data}' update obj was {update_obj['restock']} ")
-                logger.warning(f"Setting instock to FALSE, scraper found '{self.fetcher.instock_data}' in the body but metadata reported not-in-stock")
-                update_obj['restock']["in_stock"] = False
+            logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")

        # What we store in the snapshot
        price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
@@ -307,4 +290,4 @@ class perform_site_check(difference_detection_processor):
        # Always record the new checksum
        update_obj["previous_md5"] = fetched_md5

-        return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
+        return changed_detected, update_obj, snapshot_content.encode('utf-8').strip(), b''
@@ -1,107 +0,0 @@
-
-from loguru import logger
-
-
-
-def _task(watch, update_handler):
-    from changedetectionio.content_fetchers.exceptions import ReplyWithContentButNoText
-    from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
-
-    text_after_filter = ''
-
-    try:
-        # The slow process (we run 2 of these in parallel)
-        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
-            watch=watch,
-            skip_when_checksum_same=False,
-        )
-    except FilterNotFoundInResponse as e:
-        text_after_filter = f"Filter not found in HTML: {str(e)}"
-    except ReplyWithContentButNoText as e:
-        text_after_filter = f"Filter found but no text (empty result)"
-    except Exception as e:
-        text_after_filter = f"Error: {str(e)}"
-
-    if not text_after_filter.strip():
-        text_after_filter = 'Empty content'
-
-    # because run_changedetection always returns bytes due to saving the snapshots etc
-    text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
-
-    return text_after_filter
-
-
-def prepare_filter_prevew(datastore, watch_uuid):
-    '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
-    from changedetectionio import forms, html_tools
-    from changedetectionio.model.Watch import model as watch_model
-    from concurrent.futures import ProcessPoolExecutor
-    from copy import deepcopy
-    from flask import request, jsonify
-    import brotli
-    import importlib
-    import os
-    import time
-    now = time.time()
-
-    text_after_filter = ''
-    text_before_filter = ''
-    tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
-
-    if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
-        # Splice in the temporary stuff from the form
-        form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
-                                                   data=request.form
-                                                   )
-
-        # Only update vars that came in via the AJAX post
-        p = {k: v for k, v in form.data.items() if k in request.form.keys()}
-        tmp_watch.update(p)
-        blank_watch_no_filters = watch_model()
-        blank_watch_no_filters['url'] = tmp_watch.get('url')
-
-        latest_filename = next(reversed(tmp_watch.history))
-        html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
-        with open(html_fname, 'rb') as f:
-            decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
-
-            # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
-            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
-            update_handler = processor_module.perform_site_check(datastore=datastore,
-                                                                 watch_uuid=tmp_watch.get('uuid')  # probably not needed anymore anyway?
-                                                                 )
-            # Use the last loaded HTML as the input
-            update_handler.datastore = datastore
-            update_handler.fetcher.content = decompressed_data
-            update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
-
-            # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
-            # Do this as a parallel process because it could take some time
-            with ProcessPoolExecutor(max_workers=2) as executor:
-                future1 = executor.submit(_task, tmp_watch, update_handler)
-                future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
-
-                text_after_filter = future1.result()
-                text_before_filter = future2.result()
-
-    trigger_line_numbers = []
-    try:
-
-        trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
-                                                            wordlist=tmp_watch['trigger_text'],
-                                                            mode='line numbers'
-                                                            )
-    except Exception as e:
-        text_before_filter = f"Error: {str(e)}"
-
-    logger.trace(f"Parsed in {time.time() - now:.3f}s")
-
-    return jsonify(
-        {
-            'after_filter': text_after_filter,
-            'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
-            'duration': time.time() - now,
-            'trigger_line_numbers': trigger_line_numbers,
-        }
-    )
-
@@ -36,6 +36,8 @@ class PDFToHTMLToolNotFound(ValueError):
 class perform_site_check(difference_detection_processor):

    def run_changedetection(self, watch, skip_when_checksum_same=True):
+        from concurrent.futures import ProcessPoolExecutor
+        from functools import partial

        changed_detected = False
        html_content = ""
@@ -172,20 +174,30 @@ class perform_site_check(difference_detection_processor):
                    for filter_rule in include_filters_rule:
                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
                        if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
-                            html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
+                            with ProcessPoolExecutor() as executor:
+                                # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                                future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
                                                                    html_content=self.fetcher.content,
                                                                    append_pretty_line_formatting=not watch.is_source_type_url,
-                                                                    is_rss=is_rss)
+                                                                    is_rss=is_rss))
+                                html_content += future.result()

                        elif filter_rule.startswith('xpath1:'):
-                            html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
-                                                                     html_content=self.fetcher.content,
-                                                                     append_pretty_line_formatting=not watch.is_source_type_url,
-                                                                     is_rss=is_rss)
+                            with ProcessPoolExecutor() as executor:
+                                # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                                future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
+                                                                    html_content=self.fetcher.content,
+                                                                    append_pretty_line_formatting=not watch.is_source_type_url,
+                                                                    is_rss=is_rss))
+                                html_content += future.result()
                        else:
-                            html_content += html_tools.include_filters(include_filters=filter_rule,
+                            with ProcessPoolExecutor() as executor:
+                                # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                                # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+                                future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
                                                                       html_content=self.fetcher.content,
-                                                                       append_pretty_line_formatting=not watch.is_source_type_url)
+                                                                       append_pretty_line_formatting=not watch.is_source_type_url))
+                                html_content += future.result()

                    if not html_content.strip():
                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
@@ -198,24 +210,15 @@ class perform_site_check(difference_detection_processor):
                else:
                    # extract text
                    do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
-                    stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
-                                                                      render_anchor_tag_content=do_anchor,
-                                                                      is_rss=is_rss)  # 1874 activate the <title workaround hack
-
-        if watch.get('trim_text_whitespace'):
-            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
-
-        if watch.get('remove_duplicate_lines'):
-            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
-
-        if watch.get('sort_text_alphabetically'):
-            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
-            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
-            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
-            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+                    with ProcessPoolExecutor() as executor:
+                        # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                        # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+                        future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
+                            render_anchor_tag_content=do_anchor,
+                            is_rss=is_rss)) #1874 activate the <title workaround hack
+                        stripped_text_from_html = future.result()

        # Re #340 - return the content before the 'ignore text' was applied
-        # Also used to calculate/show what was removed
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

        # @todo whitespace coming from missing rtrim()?
@@ -240,8 +243,8 @@ class perform_site_check(difference_detection_processor):
            if not rendered_diff and stripped_text_from_html:
                # We had some content, but no differences were found
                # Store our new file as the MD5 so it will trigger in the future
-                c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest()
-                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
+                c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest()
+                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8'), stripped_text_from_html.encode('utf-8')
            else:
                stripped_text_from_html = rendered_diff

@@ -295,7 +298,7 @@ class perform_site_check(difference_detection_processor):
                        for match in res:
                            regex_matched_output += [match] + [b'\n']

-            ##########################################################
+            # Now we will only show what the regex matched
            stripped_text_from_html = b''
            text_content_before_ignored_filter = b''
            if regex_matched_output:
@@ -304,6 +307,18 @@ class perform_site_check(difference_detection_processor):
                text_content_before_ignored_filter = stripped_text_from_html


+        if watch.get('sort_text_alphabetically') and stripped_text_from_html:
+            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
+            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
+            stripped_text_from_html = stripped_text_from_html.replace(b'\n\n', b'\n')
+            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.decode('utf-8').splitlines(), key=lambda x: x.lower())).encode('utf-8')
+
+        #
+        if watch.get('trim_text_whitespace') and stripped_text_from_html:
+            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.decode('utf-8').splitlines()).encode('utf-8')
+#
+        if watch.get('remove_duplicate_lines') and stripped_text_from_html:
+            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.decode('utf-8').splitlines())).encode('utf-8')

        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
@@ -364,5 +379,4 @@ class perform_site_check(difference_detection_processor):
        if not watch.get('previous_md5'):
            watch['previous_md5'] = fetched_md5

-        # stripped_text_from_html - Everything after filters and NO 'ignored' content
-        return changed_detected, update_obj, stripped_text_from_html
+        return changed_detected, update_obj, text_content_before_ignored_filter, stripped_text_from_html
@@ -16,31 +16,25 @@ echo "---------------------------------- SOCKS5 -------------------"
 docker run --network changedet-network \
  -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
  --rm \
-  -e "FLASK_SERVER_NAME=cdio" \
-  --hostname cdio \
  -e "SOCKSTEST=proxiesjson" \
  test-changedetectionio \
-  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py'
+  bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'

 # SOCKS5 related - by manually entering in UI
 docker run --network changedet-network \
  --rm \
-  -e "FLASK_SERVER_NAME=cdio" \
-  --hostname cdio \
  -e "SOCKSTEST=manual" \
  test-changedetectionio \
-  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py'
+  bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py'

 # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
 docker run --network changedet-network \
  -e "SOCKSTEST=manual-playwright" \
-  --hostname cdio \
-  -e "FLASK_SERVER_NAME=cdio" \
  -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
  -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
  --rm \
  test-changedetectionio \
-  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py'
+  bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'

 echo "socks5 server logs"
 docker logs socks5proxy
@@ -0,0 +1,56 @@
+/**
+ * debounce
+ * @param {integer} milliseconds This param indicates the number of milliseconds
+ *     to wait after the last call before calling the original function.
+ * @param {object} What "this" refers to in the returned function.
+ * @return {function} This returns a function that when called will wait the
+ *     indicated number of milliseconds after the last call before
+ *     calling the original function.
+ */
+Function.prototype.debounce = function (milliseconds, context) {
+    var baseFunction = this,
+        timer = null,
+        wait = milliseconds;
+
+    return function () {
+        var self = context || this,
+            args = arguments;
+
+        function complete() {
+            baseFunction.apply(self, args);
+            timer = null;
+        }
+
+        if (timer) {
+            clearTimeout(timer);
+        }
+
+        timer = setTimeout(complete, wait);
+    };
+};
+
+/**
+* throttle
+* @param {integer} milliseconds This param indicates the number of milliseconds
+*     to wait between calls before calling the original function.
+* @param {object} What "this" refers to in the returned function.
+* @return {function} This returns a function that when called will wait the
+*     indicated number of milliseconds between calls before
+*     calling the original function.
+*/
+Function.prototype.throttle = function (milliseconds, context) {
+    var baseFunction = this,
+        lastEventTimestamp = null,
+        limit = milliseconds;
+
+    return function () {
+        var self = context || this,
+            args = arguments,
+            now = Date.now();
+
+        if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
+            lastEventTimestamp = now;
+            baseFunction.apply(self, args);
+        }
+    };
+};
@@ -1,162 +0,0 @@
-(function ($) {
-    /**
-     * debounce
-     * @param {integer} milliseconds This param indicates the number of milliseconds
-     *     to wait after the last call before calling the original function.
-     * @param {object} What "this" refers to in the returned function.
-     * @return {function} This returns a function that when called will wait the
-     *     indicated number of milliseconds after the last call before
-     *     calling the original function.
-     */
-    Function.prototype.debounce = function (milliseconds, context) {
-        var baseFunction = this,
-            timer = null,
-            wait = milliseconds;
-
-        return function () {
-            var self = context || this,
-                args = arguments;
-
-            function complete() {
-                baseFunction.apply(self, args);
-                timer = null;
-            }
-
-            if (timer) {
-                clearTimeout(timer);
-            }
-
-            timer = setTimeout(complete, wait);
-        };
-    };
-
-    /**
-     * throttle
-     * @param {integer} milliseconds This param indicates the number of milliseconds
-     *     to wait between calls before calling the original function.
-     * @param {object} What "this" refers to in the returned function.
-     * @return {function} This returns a function that when called will wait the
-     *     indicated number of milliseconds between calls before
-     *     calling the original function.
-     */
-    Function.prototype.throttle = function (milliseconds, context) {
-        var baseFunction = this,
-            lastEventTimestamp = null,
-            limit = milliseconds;
-
-        return function () {
-            var self = context || this,
-                args = arguments,
-                now = Date.now();
-
-            if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
-                lastEventTimestamp = now;
-                baseFunction.apply(self, args);
-            }
-        };
-    };
-
-    $.fn.highlightLines = function (configurations) {
-        return this.each(function () {
-            const $pre = $(this);
-            const textContent = $pre.text();
-            const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings
-
-            // Build a map of line numbers to styles
-            const lineStyles = {};
-
-            configurations.forEach(config => {
-                const {color, lines: lineNumbers} = config;
-                lineNumbers.forEach(lineNumber => {
-                    lineStyles[lineNumber] = color;
-                });
-            });
-
-            // Function to escape HTML characters
-            function escapeHtml(text) {
-                return text.replace(/[&<>"'`=\/]/g, function (s) {
-                    return "&#" + s.charCodeAt(0) + ";";
-                });
-            }
-
-            // Process each line
-            const processedLines = lines.map((line, index) => {
-                const lineNumber = index + 1; // Line numbers start at 1
-                const escapedLine = escapeHtml(line);
-                const color = lineStyles[lineNumber];
-
-                if (color) {
-                    // Wrap the line in a span with inline style
-                    return `<span style="background-color: ${color}">${escapedLine}</span>`;
-                } else {
-                    return escapedLine;
-                }
-            });
-
-            // Join the lines back together
-            const newContent = processedLines.join('\n');
-
-            // Set the new content as HTML
-            $pre.html(newContent);
-        });
-    };
-    $.fn.miniTabs = function (tabsConfig, options) {
-        const settings = {
-            tabClass: 'minitab',
-            tabsContainerClass: 'minitabs',
-            activeClass: 'active',
-            ...(options || {})
-        };
-
-        return this.each(function () {
-            const $wrapper = $(this);
-            const $contents = $wrapper.find('div[id]').hide();
-            const $tabsContainer = $('<div>', {class: settings.tabsContainerClass}).prependTo($wrapper);
-
-            // Generate tabs
-            Object.entries(tabsConfig).forEach(([tabTitle, contentSelector], index) => {
-                const $content = $wrapper.find(contentSelector);
-                if (index === 0) $content.show(); // Show first content by default
-
-                $('<a>', {
-                    class: `${settings.tabClass}${index === 0 ? ` ${settings.activeClass}` : ''}`,
-                    text: tabTitle,
-                    'data-target': contentSelector
-                }).appendTo($tabsContainer);
-            });
-
-            // Tab click event
-            $tabsContainer.on('click', `.${settings.tabClass}`, function (e) {
-                e.preventDefault();
-                const $tab = $(this);
-                const target = $tab.data('target');
-
-                // Update active tab
-                $tabsContainer.find(`.${settings.tabClass}`).removeClass(settings.activeClass);
-                $tab.addClass(settings.activeClass);
-
-                // Show/hide content
-                $contents.hide();
-                $wrapper.find(target).show();
-            });
-        });
-    };
-
-    // Object to store ongoing requests by namespace
-    const requests = {};
-
-    $.abortiveSingularAjax = function (options) {
-        const namespace = options.namespace || 'default';
-
-        // Abort the current request in this namespace if it's still ongoing
-        if (requests[namespace]) {
-            requests[namespace].abort();
-        }
-
-        // Start a new AJAX request and store its reference in the correct namespace
-        requests[namespace] = $.ajax(options);
-
-        // Return the current request in case it's needed
-        return requests[namespace];
-    };
-})(jQuery);
@@ -1,63 +1,53 @@
-function redirectToVersion(version) {
-    var currentUrl = window.location.href.split('?')[0]; // Base URL without query parameters
+function redirect_to_version(version) {
+    var currentUrl = window.location.href;
+    var baseUrl = currentUrl.split('?')[0]; // Base URL without query parameters
    var anchor = '';

    // Check if there is an anchor
-    if (currentUrl.indexOf('#') !== -1) {
-        anchor = currentUrl.substring(currentUrl.indexOf('#'));
-        currentUrl = currentUrl.substring(0, currentUrl.indexOf('#'));
+    if (baseUrl.indexOf('#') !== -1) {
+        anchor = baseUrl.substring(baseUrl.indexOf('#'));
+        baseUrl = baseUrl.substring(0, baseUrl.indexOf('#'));
    }
-
-    window.location.href = currentUrl + '?version=' + version + anchor;
+    window.location.href = baseUrl + '?version=' + version + anchor;
 }

-function setupDateWidget() {
-    $(document).on('keydown', function (event) {
-        var $selectElement = $('#preview-version');
-        var $selectedOption = $selectElement.find('option:selected');
-
-        if ($selectedOption.length) {
-            if (event.key === 'ArrowLeft' && $selectedOption.prev().length) {
-                redirectToVersion($selectedOption.prev().val());
-            } else if (event.key === 'ArrowRight' && $selectedOption.next().length) {
-                redirectToVersion($selectedOption.next().val());
+document.addEventListener('keydown', function (event) {
+    var selectElement = document.getElementById('preview-version');
+    if (selectElement) {
+        var selectedOption = selectElement.querySelector('option:checked');
+        if (selectedOption) {
+            if (event.key === 'ArrowLeft') {
+                if (selectedOption.previousElementSibling) {
+                    redirect_to_version(selectedOption.previousElementSibling.value);
+                }
+            } else if (event.key === 'ArrowRight') {
+                if (selectedOption.nextElementSibling) {
+                    redirect_to_version(selectedOption.nextElementSibling.value);
+                }
            }
        }
-    });
+    }
+});

-    $('#preview-version').on('change', function () {
-        redirectToVersion($(this).val());
-    });

-    var $selectedOption = $('#preview-version option:selected');
+document.getElementById('preview-version').addEventListener('change', function () {
+    redirect_to_version(this.value);
+});

-    if ($selectedOption.length) {
-        var $prevOption = $selectedOption.prev();
-        var $nextOption = $selectedOption.next();
-
-        if ($prevOption.length) {
-            $('#btn-previous').attr('href', '?version=' + $prevOption.val());
+var selectElement = document.getElementById('preview-version');
+if (selectElement) {
+    var selectedOption = selectElement.querySelector('option:checked');
+    if (selectedOption) {
+        if (selectedOption.previousElementSibling) {
+            document.getElementById('btn-previous').href = "?version=" + selectedOption.previousElementSibling.value;
        } else {
-            $('#btn-previous').remove();
+            document.getElementById('btn-previous').remove()
+        }
+        if (selectedOption.nextElementSibling) {
+            document.getElementById('btn-next').href = "?version=" + selectedOption.nextElementSibling.value;
+        } else {
+            document.getElementById('btn-next').remove()
        }

-        if ($nextOption.length) {
-            $('#btn-next').attr('href', '?version=' + $nextOption.val());
-        } else {
-            $('#btn-next').remove();
-        }
    }
 }
-
-$(document).ready(function () {
-    if ($('#preview-version').length) {
-        setupDateWidget();
-    }
-
-    $('#diff-col > pre').highlightLines([
-        {
-            'color': '#ee0000',
-            'lines': triggered_line_numbers
-        }
-    ]);
-});
@@ -49,9 +49,4 @@ $(document).ready(function () {
        $("#overlay").toggleClass('visible');
        heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)';
    });
-
-    setInterval(function () {
-        $('body').toggleClass('spinner-active', $.active > 0);
-    }, 2000);
-
 });
@@ -12,38 +12,41 @@ function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
    checkbox.addEventListener('change', updateOpacity);
 }

+(function($) {
+    // Object to store ongoing requests by namespace
+    const requests = {};
+
+    $.abortiveSingularAjax = function(options) {
+        const namespace = options.namespace || 'default';
+
+        // Abort the current request in this namespace if it's still ongoing
+        if (requests[namespace]) {
+            requests[namespace].abort();
+        }
+
+        // Start a new AJAX request and store its reference in the correct namespace
+        requests[namespace] = $.ajax(options);
+
+        // Return the current request in case it's needed
+        return requests[namespace];
+    };
+})(jQuery);

 function request_textpreview_update() {
-    if (!$('body').hasClass('preview-text-enabled')) {
-        console.error("Preview text was requested but body tag was not setup")
-        return
-    }
-
    const data = {};
    $('textarea:visible, input:visible').each(function () {
        const $element = $(this); // Cache the jQuery object for the current element
        const name = $element.attr('name'); // Get the name attribute of the element
-        data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
+        data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : undefined) : $element.val();
    });

-    $('body').toggleClass('spinner-active', 1);
-
    $.abortiveSingularAjax({
        type: "POST",
        url: preview_text_edit_filters_url,
        data: data,
        namespace: 'watchEdit'
    }).done(function (data) {
-        console.debug(data['duration'])
-        $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
-        $('#filters-and-triggers #text-preview-inner')
-            .text(data['after_filter'])
-            .highlightLines([
-                {
-                    'color': '#ee0000',
-                    'lines': data['trigger_line_numbers']
-                }
-            ]);
+        $('#filters-and-triggers #text-preview-inner').text(data);
    }).fail(function (error) {
        if (error.statusText === 'abort') {
            console.log('Request was aborted due to a new request being fired.');
@@ -70,19 +73,20 @@ $(document).ready(function () {

    const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
    $("#text-preview-inner").css('max-height', (vh-300)+"px");
-    $("#text-preview-before-inner").css('max-height', (vh-300)+"px");

    $("#activate-text-preview").click(function (e) {
+        $(this).fadeOut();
        $('body').toggleClass('preview-text-enabled')
+
        request_textpreview_update();
-        const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
-        $('textarea:visible')[method]('keyup blur', request_textpreview_update.throttle(1000));
-        $('input:visible')[method]('keyup blur change', request_textpreview_update.throttle(1000));
-        $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000));
-    });
-    $('.minitabs-wrapper').miniTabs({
-        "Content after filters": "#text-preview-inner",
-        "Content raw/before filters": "#text-preview-before-inner"
+        $("#text-preview-refresh").click(function (e) {
+            request_textpreview_update();
+        });
+        $('textarea:visible, input:visible').on('keyup keypress blur change click', function (e) {
+            request_textpreview_update();
+        });
+
    });
+
 });

@@ -40,29 +40,15 @@
  }
 }

+#browser-steps-fieldlist {
+  height: 100%;
+  overflow-y: scroll;
+}

 #browser-steps .flex-wrapper {
  display: flex;
  flex-flow: row;
  height: 70vh;
-  font-size: 80%;
-  #browser-steps-ui {
-    flex-grow: 1;      /* Allow it to grow and fill the available space */
-    flex-shrink: 1;    /* Allow it to shrink if needed */
-    flex-basis: 0;     /* Start with 0 base width so it stretches as much as possible */
-    background-color: #eee;
-    border-radius: 5px;
-
-  }
-
-  #browser-steps-fieldlist {
-    flex-grow: 0;      /* Don't allow it to grow */
-    flex-shrink: 0;    /* Don't allow it to shrink */
-    flex-basis: auto;  /* Base width is determined by the content */
-    max-width: 400px;  /* Set a max width to prevent overflow */
-    padding-left: 1rem;
-    overflow-y: scroll;
-  }
 }

 /*  this is duplicate :( */
@@ -1,47 +0,0 @@
-.minitabs-wrapper {
-  width: 100%;
-
-  > div[id] {
-    padding: 20px;
-    border: 1px solid #ccc;
-    border-top: none;
-  }
-
-  .minitabs-content {
-    width: 100%;
-    display: flex;
-    > div {
-      flex: 1 1 auto;
-      min-width: 0;
-      overflow: scroll;
-    }
-  }
-
-  .minitabs {
-    display: flex;
-    border-bottom: 1px solid #ccc;
-  }
-
-  .minitab {
-    flex: 1;
-    text-align: center;
-    padding: 12px 0;
-    text-decoration: none;
-    color: #333;
-    background-color: #f1f1f1;
-    border: 1px solid #ccc;
-    border-bottom: none;
-    cursor: pointer;
-    transition: background-color 0.3s;
-  }
-
-  .minitab:hover {
-    background-color: #ddd;
-  }
-
-  .minitab.active {
-    background-color: #fff;
-    font-weight: bold;
-  }
-
-}
@@ -1,13 +1,8 @@
-@import "minitabs";
-
 body.preview-text-enabled {
-
-  @media (min-width: 800px) {
-    #filters-and-triggers > div {
-      display: flex; /* Establishes Flexbox layout */
-      gap: 20px; /* Adds space between the columns */
-      position: relative; /* Ensures the sticky positioning is relative to this parent */
-    }
+  #filters-and-triggers > div {
+    display: flex; /* Establishes Flexbox layout */
+    gap: 20px; /* Adds space between the columns */
+    position: relative; /* Ensures the sticky positioning is relative to this parent */
  }

  /* layout of the page */
@@ -24,32 +19,27 @@ body.preview-text-enabled {

  #text-preview {
    position: sticky;
-    top: 20px;
-    padding-top: 1rem;
-    padding-bottom: 1rem;
+    top: 25px;
    display: block !important;
  }

-  #activate-text-preview {
-      background-color: var(--color-grey-500);
-  }
-
  /* actual preview area */
-  .monospace-preview {
-    background: var(--color-background-input);
+  #text-preview-inner {
+    background: var(--color-grey-900);
    border: 1px solid var(--color-grey-600);
    padding: 1rem;
-    color: var(--color-text-input);
+    color: #333;
    font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
-    font-size: 70%;
-    word-break: break-word;
+    font-size: 12px;
+    overflow-x: scroll;
    white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
+    overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
  }
 }

 #activate-text-preview {
  right: 0;
  position: absolute;
-  z-index: 3;
+  z-index: 0;
  box-shadow: 1px 1px 4px var(--color-shadow-jump);
 }
@@ -106,34 +106,10 @@ button.toggle-button {
  padding: 5px;
  display: flex;
  justify-content: space-between;
+  border-bottom: 2px solid var(--color-menu-accent);
  align-items: center;
 }

-#pure-menu-horizontal-spinner {
-  height: 3px;
-  background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
-  background-size: 400% 400%;
-  width: 100%;
-  animation: gradient 200s ease infinite;
-}
-
-body.spinner-active {
-  #pure-menu-horizontal-spinner {
-    animation: gradient 1s ease infinite;
-  }
-}
-
-@keyframes gradient {
-	0% {
-		background-position: 0% 50%;
-	}
-	50% {
-		background-position: 100% 50%;
-	}
-	100% {
-		background-position: 0% 50%;
-	}
-}
 .pure-menu-heading {
  color: var(--color-text-menu-heading);
 }
@@ -345,6 +321,10 @@ a.pure-button-selected {
  background: var(--color-background-button-cancel);
 }

+#save_button {
+  margin-right: 1rem;
+}
+
 .messages {
  li {
    list-style: none;
@@ -641,9 +621,9 @@ footer {
      list-style: none;

      li {
-        display: flex;
-        align-items: center;
-        gap: 1em;
+        >* {
+          display: inline-block;
+        }
      }
    }
  }
@@ -703,12 +683,6 @@ footer {
      tr {
        th {
          display: inline-block;
-          // Hide the "Last" text for smaller screens
-          @media (max-width: 768px) {
-            .hide-on-mobile {
-              display: none; 
-            }
-          }
        }
      }
      .empty-cell {
@@ -724,24 +698,6 @@ footer {
      }
    }

-    tbody {
-      tr {
-        display: flex;
-        flex-wrap: wrap;
-
-        // The third child of each row will take up the remaining space
-        // This is useful for the URL column, which should expand to fill the remaining space
-        :nth-child(3) {
-          flex-grow: 1;
-        }
-        // The last three children (from the end) of each row will take up the full width
-        // This is useful for the "Last Checked", "Last Changed", and the action buttons columns, which should each take up the full width
-        :nth-last-child(-n+3) {
-          flex-basis: 100%;
-        }
-      }
-    }
-
    .last-checked {
      >span {
        vertical-align: middle;
@@ -860,11 +816,6 @@ textarea::placeholder {
 - We dont use 'size' with <input> because `size` is too unreliable to override, and will often push-out
 - Rely always on width in CSS
 */
-/** Set max width for input field */
-.m-d {
-  min-width: 100%;
-}
-
@media only screen and (min-width: 761px) {

  /* m-d is medium-desktop */
@@ -980,13 +931,6 @@ body.full-width {
    background: var(--color-background);
  }

-  /* Make action buttons have consistent size and spacing */
-  #actions .pure-control-group {
-    display: flex;
-    gap: 0.625em;
-    flex-wrap: wrap;
-  }
-
  .pure-form-message-inline {
    padding-left: 0;
    color: var(--color-text-input-description);
@@ -1030,28 +974,6 @@ ul {
  }
 }

-@media only screen and (max-width: 760px) {
-  .time-check-widget {
-    tbody {
-      display: grid;
-      grid-template-columns: auto 1fr auto 1fr;
-      gap: 0.625em 0.3125em;
-      align-items: center;
-    }    
-    tr {
-      display: contents; 
-      th {
-        text-align: right;
-        padding-right: 5px;
-      }
-      input[type="number"] {
-        width: 100%;
-        max-width: 5em;
-      }
-    }
-  }
-}
-
@import "parts/_visualselector";

 #webdriver_delay {
@@ -46,31 +46,14 @@
    #browser_steps li > label {
      display: none; }

+#browser-steps-fieldlist {
+  height: 100%;
+  overflow-y: scroll; }
+
 #browser-steps .flex-wrapper {
  display: flex;
  flex-flow: row;
-  height: 70vh;
-  font-size: 80%; }
-  #browser-steps .flex-wrapper #browser-steps-ui {
-    flex-grow: 1;
-    /* Allow it to grow and fill the available space */
-    flex-shrink: 1;
-    /* Allow it to shrink if needed */
-    flex-basis: 0;
-    /* Start with 0 base width so it stretches as much as possible */
-    background-color: #eee;
-    border-radius: 5px; }
-  #browser-steps .flex-wrapper #browser-steps-fieldlist {
-    flex-grow: 0;
-    /* Don't allow it to grow */
-    flex-shrink: 0;
-    /* Don't allow it to shrink */
-    flex-basis: auto;
-    /* Base width is determined by the content */
-    max-width: 400px;
-    /* Set a max width to prevent overflow */
-    padding-left: 1rem;
-    overflow-y: scroll; }
+  height: 70vh; }

 /*  this is duplicate :( */
 #browsersteps-selector-wrapper {
@@ -428,50 +411,16 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
    fill: #ff0000 !important;
    transition: all ease 0.3s !important; }

-.minitabs-wrapper {
-  width: 100%; }
-  .minitabs-wrapper > div[id] {
-    padding: 20px;
-    border: 1px solid #ccc;
-    border-top: none; }
-  .minitabs-wrapper .minitabs-content {
-    width: 100%;
-    display: flex; }
-    .minitabs-wrapper .minitabs-content > div {
-      flex: 1 1 auto;
-      min-width: 0;
-      overflow: scroll; }
-  .minitabs-wrapper .minitabs {
-    display: flex;
-    border-bottom: 1px solid #ccc; }
-  .minitabs-wrapper .minitab {
-    flex: 1;
-    text-align: center;
-    padding: 12px 0;
-    text-decoration: none;
-    color: #333;
-    background-color: #f1f1f1;
-    border: 1px solid #ccc;
-    border-bottom: none;
-    cursor: pointer;
-    transition: background-color 0.3s; }
-  .minitabs-wrapper .minitab:hover {
-    background-color: #ddd; }
-  .minitabs-wrapper .minitab.active {
-    background-color: #fff;
-    font-weight: bold; }
-
 body.preview-text-enabled {
  /* layout of the page */
  /* actual preview area */ }
-  @media (min-width: 800px) {
-    body.preview-text-enabled #filters-and-triggers > div {
-      display: flex;
-      /* Establishes Flexbox layout */
-      gap: 20px;
-      /* Adds space between the columns */
-      position: relative;
-      /* Ensures the sticky positioning is relative to this parent */ } }
+  body.preview-text-enabled #filters-and-triggers > div {
+    display: flex;
+    /* Establishes Flexbox layout */
+    gap: 20px;
+    /* Adds space between the columns */
+    position: relative;
+    /* Ensures the sticky positioning is relative to this parent */ }
  body.preview-text-enabled #edit-text-filter, body.preview-text-enabled #text-preview {
    flex: 1;
    /* Each column takes an equal amount of available space */
@@ -481,28 +430,26 @@ body.preview-text-enabled {
    display: none; }
  body.preview-text-enabled #text-preview {
    position: sticky;
-    top: 20px;
-    padding-top: 1rem;
-    padding-bottom: 1rem;
+    top: 25px;
    display: block !important; }
-  body.preview-text-enabled #activate-text-preview {
-    background-color: var(--color-grey-500); }
-  body.preview-text-enabled .monospace-preview {
-    background: var(--color-background-input);
+  body.preview-text-enabled #text-preview-inner {
+    background: var(--color-grey-900);
    border: 1px solid var(--color-grey-600);
    padding: 1rem;
-    color: var(--color-text-input);
+    color: #333;
    font-family: "Courier New", Courier, monospace;
    /* Sets the font to a monospace type */
-    font-size: 70%;
-    word-break: break-word;
+    font-size: 12px;
+    overflow-x: scroll;
    white-space: pre-wrap;
-    /* Preserves whitespace and line breaks like <pre> */ }
+    /* Preserves whitespace and line breaks like <pre> */
+    overflow-wrap: break-word;
+    /* Allows long words to break and wrap to the next line */ }

 #activate-text-preview {
  right: 0;
  position: absolute;
-  z-index: 3;
+  z-index: 0;
  box-shadow: 1px 1px 4px var(--color-shadow-jump); }

 body {
@@ -573,26 +520,9 @@ button.toggle-button {
  padding: 5px;
  display: flex;
  justify-content: space-between;
+  border-bottom: 2px solid var(--color-menu-accent);
  align-items: center; }

-#pure-menu-horizontal-spinner {
-  height: 3px;
-  background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
-  background-size: 400% 400%;
-  width: 100%;
-  animation: gradient 200s ease infinite; }
-
-body.spinner-active #pure-menu-horizontal-spinner {
-  animation: gradient 1s ease infinite; }
-
-@keyframes gradient {
-  0% {
-    background-position: 0% 50%; }
-  50% {
-    background-position: 100% 50%; }
-  100% {
-    background-position: 0% 50%; } }
-
 .pure-menu-heading {
  color: var(--color-text-menu-heading); }

@@ -745,6 +675,9 @@ a.pure-button-selected {
 .button-cancel {
  background: var(--color-background-button-cancel); }

+#save_button {
+  margin-right: 1rem; }
+
 .messages li {
  list-style: none;
  padding: 1em;
@@ -943,10 +876,8 @@ footer {
  .pure-form .inline-radio ul {
    margin: 0px;
    list-style: none; }
-    .pure-form .inline-radio ul li {
-      display: flex;
-      align-items: center;
-      gap: 1em; }
+    .pure-form .inline-radio ul li > * {
+      display: inline-block; }

@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
  .box {
@@ -982,24 +913,12 @@ footer {
    .watch-table thead {
      display: block; }
      .watch-table thead tr th {
-        display: inline-block; } }
-      @media only screen and (max-width: 760px) and (max-width: 768px), (min-device-width: 768px) and (max-device-width: 800px) and (max-width: 768px) {
-        .watch-table thead tr th .hide-on-mobile {
-          display: none; } }
-
-@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 800px) {
+        display: inline-block; }
      .watch-table thead .empty-cell {
        display: none; }
    .watch-table tbody td,
    .watch-table tbody tr {
      display: block; }
-    .watch-table tbody tr {
-      display: flex;
-      flex-wrap: wrap; }
-      .watch-table tbody tr :nth-child(3) {
-        flex-grow: 1; }
-      .watch-table tbody tr :nth-last-child(-n+3) {
-        flex-basis: 100%; }
    .watch-table .last-checked > span {
      vertical-align: middle; }
    .watch-table .last-checked::before {
@@ -1091,10 +1010,6 @@ textarea::placeholder {
 - We dont use 'size' with <input> because `size` is too unreliable to override, and will often push-out
 - Rely always on width in CSS
 */
-/** Set max width for input field */
-.m-d {
-  min-width: 100%; }
-
@media only screen and (min-width: 761px) {
  /* m-d is medium-desktop */
  .m-d {
@@ -1155,8 +1070,7 @@ body.full-width .edit-form {
 .edit-form {
  min-width: 70%;
  /* so it cant overflow */
-  max-width: 95%;
-  /* Make action buttons have consistent size and spacing */ }
+  max-width: 95%; }
  .edit-form .box-wrap {
    position: relative; }
  .edit-form .inner {
@@ -1165,10 +1079,6 @@ body.full-width .edit-form {
  .edit-form #actions {
    display: block;
    background: var(--color-background); }
-  .edit-form #actions .pure-control-group {
-    display: flex;
-    gap: 0.625em;
-    flex-wrap: wrap; }
  .edit-form .pure-form-message-inline {
    padding-left: 0;
    color: var(--color-text-input-description); }
@@ -1197,21 +1107,6 @@ ul {
  .time-check-widget tr input[type="number"] {
    width: 5em; }

-@media only screen and (max-width: 760px) {
-  .time-check-widget tbody {
-    display: grid;
-    grid-template-columns: auto 1fr auto 1fr;
-    gap: 0.625em 0.3125em;
-    align-items: center; }
-  .time-check-widget tr {
-    display: contents; }
-    .time-check-widget tr th {
-      text-align: right;
-      padding-right: 5px; }
-    .time-check-widget tr input[type="number"] {
-      width: 100%;
-      max-width: 5em; } }
-
 #selector-wrapper {
  height: 100%;
  text-align: center;
@@ -15,7 +15,7 @@
                                <strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
 </p>
                                <div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
-                                <ul style="display: none" id="advanced-help-notifications">
+                              <ul style="display: none" id="advanced-help-notifications">
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@@ -35,9 +35,7 @@

  <body class="">
    <div class="header">
-    <div class="pure-menu-fixed" style="width: 100%;">
-      <div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
-
+      <div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
        {% if has_password and not current_user.is_authenticated %}
          <a class="pure-menu-heading" href="https://changedetection.io" rel="noopener">
            <strong>Change</strong>Detection.io</a>
@@ -131,12 +129,7 @@
          </li>
        </ul>
      </div>
-      <div id="pure-menu-horizontal-spinner"></div>
-      </div>
-
    </div>
-
-
    {% if hosted_sticky %}
      <div class="sticky-tab" id="hosted-sticky">
        <a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a>
@@ -24,7 +24,7 @@
    const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
    const default_system_fetch_backend="{{ settings_application['fetch_backend'] }}";
 </script>
-<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
+
 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
@@ -50,7 +50,7 @@
            {% endif %}
            {% if watch['processor'] == 'text_json_diff' %}
            <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
-            <li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
+            <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
            {% endif %}
            <li class="tab"><a href="#notifications">Notifications</a></li>
            <li class="tab"><a href="#stats">Stats</a></li>
@@ -200,7 +200,7 @@ User-Agent: wonderbra 1.0") }}
                        <div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
                        <div class="flex-wrapper" >

-                            <div id="browser-steps-ui" class="noselect">
+                            <div id="browser-steps-ui" class="noselect"  style="width: 100%; background-color: #eee; border-radius: 5px;">

                                <div class="noselect"  id="browsersteps-selector-wrapper" style="width: 100%">
                                    <span class="loader" >
@@ -215,7 +215,7 @@ User-Agent: wonderbra 1.0") }}
                                    <canvas  class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
                                </div>
                            </div>
-                            <div id="browser-steps-fieldlist" >
+                            <div id="browser-steps-fieldlist" style="padding-left: 1em;  width: 350px; font-size: 80%;" >
                                <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
                                {{ render_field(form.browser_steps) }}
                            </div>
@@ -313,13 +313,12 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                    {{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
 footer
 nav
-.stockticker
-//*[contains(text(), 'Advertisement')]") }}
+.stockticker") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                          <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
-                          <li> Don't paste HTML here, use only CSS and XPath selectors </li>
-                          <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
+                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
+                          <li> Don't paste HTML here, use only CSS selectors </li>
+                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
                </fieldset>
@@ -371,10 +370,10 @@ nav
 ") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
+                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
                </span>

@@ -398,9 +397,7 @@ Unavailable") }}
                </fieldset>
                <fieldset>
                    <div class="pure-control-group">
-                        {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
- or
-keyword") }}
+                        {{ render_field(form.extract_text, rows=5, placeholder="Example: /\d+ online/") }}
                        <span class="pure-form-message-inline">
                    <ul>
                        <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
@@ -424,22 +421,14 @@ keyword") }}
                    <script>
                        const preview_text_edit_filters_url="{{url_for('watch_get_preview_rendered', uuid=uuid)}}";
                    </script>
-                    <br>
+                    <span><strong>Preview of the text that is used for changedetection after all filters run.</strong></span><br>
                    {#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
-                    <div class="minitabs-wrapper">
-                      <div class="minitabs-content">
-                          <div id="text-preview-inner" class="monospace-preview">
-                              <p>Loading...</p>
-                          </div>
-                          <div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
-                              <p>Loading...</p>
-                          </div>
-                      </div>
-                    </div>
+                <p>
+                    <div id="text-preview-inner"></div>
+                </p>
            </div>
          </div>
        </div>
-
        {% endif %}
        {# rendered sub Template #}
        {% if extra_form_content %}
@@ -3,13 +3,11 @@
 {% block content %}
    <script>
        const screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid)}}";
-        const triggered_line_numbers = {{ triggered_line_numbers|tojson }};
        {% if last_error_screenshot %}
            const error_screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
        {% endif %}
        const highlight_submit_ignore_url = "{{url_for('highlight_submit_ignore_url', uuid=uuid)}}";
    </script>
-    <script src="{{url_for('static_content', group='js', filename='plugins.js')}}"></script>
    <script src="{{ url_for('static_content', group='js', filename='diff-overview.js') }}" defer></script>
    <script src="{{ url_for('static_content', group='js', filename='preview.js') }}" defer></script>
    <script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
@@ -69,15 +67,16 @@

        <div class="tab-pane-inner" id="text">
            <div class="snapshot-age">{{ current_version|format_timestamp_timeago }}</div>
+            <span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
            <span class="tip"><strong>Pro-tip</strong>: Highlight text to add to ignore filters</span>

            <table>
                <tbody>
                <tr>
                    <td id="diff-col" class="highlightable-filter">
-                        <pre style="border-left: 2px solid #ddd;">
-{{ content }}
-                        </pre>
+                        {% for row in content %}
+                            <div class="{{ row.classes }}">{{ row.line }}</div>
+                        {% endfor %}
                    </td>
                </tr>
                </tbody>
@@ -155,13 +155,11 @@
                      {{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header
 footer
 nav
-.stockticker
-//*[contains(text(), 'Advertisement')]") }}
+.stockticker") }}
                      <span class="pure-form-message-inline">
                        <ul>
-                          <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
-                          <li> Don't paste HTML here, use only CSS and XPath selectors </li>
-                          <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
+                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
+                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
                    </fieldset>
@@ -172,11 +170,11 @@ nav
                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
+                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
                     </span>
                    </fieldset>
@@ -78,8 +78,8 @@
             {% if any_has_restock_price_processor %}
                <th>Restock &amp; Price</th>
             {% endif %}
-                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
-                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
+                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
+                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
                <th class="empty-cell"></th>
            </tr>
            </thead>
@@ -191,9 +191,9 @@
                    {% if watch.history_n >= 2 %}

                        {%  if is_unviewed %}
-                           <a href="{{ url_for('diff_history_page', uuid=watch.uuid, from_version=watch.get_next_snapshot_key_to_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
+                           <a href="{{ url_for('diff_history_page', uuid=watch.uuid, from_version=watch.get_next_snapshot_key_to_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">Diff</a>
                        {% else %}
-                           <a href="{{ url_for('diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
+                           <a href="{{ url_for('diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">Diff</a>
                        {% endif %}

                    {% else %}
@@ -44,7 +44,7 @@ def test_select_custom(client, live_server, measure_memory_usage):
        follow_redirects=True
    )
    # We should see something via proxy
-    assert b' - 0.' in res.data
+    assert b'<div class=""> - 0.' in res.data

    #
    # Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default
@@ -1,27 +1,12 @@
 #!/usr/bin/env python3
 import os
+import time
 from flask import url_for
 from changedetectionio.tests.util import live_server_setup, wait_for_all_checks


-def set_response():
-    import time
-    data = f"""<html>
-       <body>
-     <h1>Awesome, you made it</h1>
-     yeah the socks request worked
-     </body>
-     </html>
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(data)
-    time.sleep(1)
-
-
 def test_socks5(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
-    set_response()

    # Setup a proxy
    res = client.post(
@@ -39,10 +24,7 @@ def test_socks5(client, live_server, measure_memory_usage):

    assert b"Settings updated." in res.data

-    # Because the socks server should connect back to us
-    test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
-    test_url = test_url.replace('localhost.localdomain', 'cdio')
-    test_url = test_url.replace('localhost', 'cdio')
+    test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')

    res = client.post(
        url_for("form_quick_watch_add"),
@@ -78,4 +60,4 @@ def test_socks5(client, live_server, measure_memory_usage):
    )

    # Should see the proper string
-    assert "Awesome, you made it".encode('utf-8') in res.data
+    assert "+0200:".encode('utf-8') in res.data
@@ -1,32 +1,16 @@
 #!/usr/bin/env python3
 import os
+import time
 from flask import url_for
 from changedetectionio.tests.util import live_server_setup, wait_for_all_checks


-def set_response():
-    import time
-    data = f"""<html>
-       <body>
-     <h1>Awesome, you made it</h1>
-     yeah the socks request worked
-     </body>
-     </html>
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(data)
-    time.sleep(1)
-
 # should be proxies.json mounted from run_proxy_tests.sh already
 # -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
 def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
-    set_response()
-    # Because the socks server should connect back to us
-    test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
-    test_url = test_url.replace('localhost.localdomain', 'cdio')
-    test_url = test_url.replace('localhost', 'cdio')
+
+    test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')

    res = client.get(url_for("settings_page"))
    assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data
@@ -65,4 +49,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
    )

    # Should see the proper string
-    assert "Awesome, you made it".encode('utf-8') in res.data
+    assert "+0200:".encode('utf-8') in res.data
@@ -39,8 +39,9 @@ def test_setup(client, live_server, measure_memory_usage):
    live_server_setup(live_server)

 def test_check_removed_line_contains_trigger(client, live_server, measure_memory_usage):
-    #live_server_setup(live_server)
+
    # Give the endpoint time to spin up
+    time.sleep(1)
    set_original()
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
@@ -151,9 +152,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa

    # A line thats not the trigger should not trigger anything
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    assert b'1 watches queued for rechecking.' in res.data
-
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
@@ -87,9 +87,6 @@ def test_element_removal_output():
     Some initial text<br>
     <p>across multiple lines</p>
     <div id="changetext">Some text that changes</div>
-     <div>Some text should be matched by xPath // selector</div>
-     <div>Some text should be matched by xPath selector</div>
-     <div>Some text should be matched by xPath1 selector</div>
     </body>
    <footer>
    <p>Footer</p>
@@ -97,16 +94,7 @@ def test_element_removal_output():
     </html>
    """
    html_blob = element_removal(
-      [
-        "header",
-        "footer",
-        "nav",
-        "#changetext",
-        "//*[contains(text(), 'xPath // selector')]",
-        "xpath://*[contains(text(), 'xPath selector')]",
-        "xpath1://*[contains(text(), 'xPath1 selector')]"
-      ],
-      html_content=content
+        ["header", "footer", "nav", "#changetext"], html_content=content
    )
    text = get_text(html_blob)
    assert (
@@ -3,7 +3,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from .util import live_server_setup, wait_for_all_checks
 import pytest


@@ -38,11 +38,6 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):
    # Give the thread time to pick it up
    wait_for_all_checks(client)

-
-    # Content type recording worked
-    uuid = extract_UUID_from_client(client)
-    assert live_server.app.config['DATASTORE'].data['watching'][uuid]['content-type'] == "text/html"
-
    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
@@ -115,9 +115,9 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage):
    # Plaintext that doesnt look like a regex should match also
    assert b'and this should be' in res.data

-    assert b'Something' in res.data
-    assert b'across 6 billion multiple' in res.data
-    assert b'lines' in res.data
+    assert b'<div class="">Something' in res.data
+    assert b'<div class="">across 6 billion multiple' in res.data
+    assert b'<div class="">lines' in res.data

    # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
    assert b'aaand something lines' not in res.data
@@ -183,19 +183,20 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
        follow_redirects=True
    )

-    assert b'1000 online' in res.data
+    # Class will be blank for now because the frontend didnt apply the diff
+    assert b'<div class="">1000 online' in res.data

    # All regex matching should be here
-    assert b'2000 online' in res.data
+    assert b'<div class="">2000 online' in res.data

    # Both regexs should be here
-    assert b'80 guests' in res.data
+    assert b'<div class="">80 guests' in res.data

    # Regex with flag handling should be here
-    assert b'SomeCase insensitive 3456' in res.data
+    assert b'<div class="">SomeCase insensitive 3456' in res.data

    # Singular group from /somecase insensitive (345\d)/i
-    assert b'3456' in res.data
+    assert b'<div class="">3456' in res.data

    # Regex with multiline flag handling should be here

@@ -79,14 +79,14 @@ def set_modified_ignore_response():
        f.write(test_return_data)


-# Ignore text now just removes it entirely, is a LOT more simpler code this way
-
 def test_check_ignore_text_functionality(client, live_server, measure_memory_usage):

    # Use a mix of case in ZzZ to prove it works case-insensitive.
    ignore_text = "XXXXX\r\nYYYYY\r\nzZzZZ\r\nnew ignore stuff"
    set_original_ignore_response()

+    # Give the endpoint time to spin up
+    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
@@ -151,10 +151,12 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

+    # Check the preview/highlighter, we should be able to see what we ignored, but it should be highlighted
+    # We only introduce the "modified" content that includes what we ignore so we can prove the newest version also displays
+    # at /preview
    res = client.get(url_for("preview_page", uuid="first"))
-
-    # Should no longer be in the preview
-    assert b'new ignore stuff' not in res.data
+    # We should be able to see what we ignored
+    assert b'<div class="ignored">new ignore stuff' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
@@ -23,7 +23,7 @@ def set_original_ignore_response():
        f.write(test_return_data)


-def test_ignore(client, live_server, measure_memory_usage):
+def test_highlight_ignore(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
    set_original_ignore_response()
    test_url = url_for('test_endpoint', _external=True)
@@ -51,9 +51,9 @@ def test_ignore(client, live_server, measure_memory_usage):
    # Should return a link
    assert b'href' in res.data

-    # It should not be in the preview anymore
+    # And it should register in the preview page
    res = client.get(url_for("preview_page", uuid=uuid))
-    assert b'<div class="ignored">oh yeah 456' not in res.data
+    assert b'<div class="ignored">oh yeah 456' in res.data

    # Should be in base.html
    assert b'csrftoken' in res.data
@@ -499,7 +499,7 @@ def test_correct_header_detect(client, live_server, measure_memory_usage):
    )

    assert b'&#34;hello&#34;: 123,' in res.data
-    assert b'&#34;world&#34;: 123' in res.data
+    assert b'&#34;world&#34;: 123</div>' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from . util import live_server_setup


 def set_original_ignore_response():
@@ -59,9 +59,12 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):

    live_server_setup(live_server)

+    sleep_time_for_fetch_thread = 3
    trigger_text = "Add to cart"
    set_original_ignore_response()

+    # Give the endpoint time to spin up
+    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
@@ -86,14 +89,14 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):
    )
    assert b"Updated watch." in res.data

-    wait_for_all_checks(client)
    # Check it saved
    res = client.get(
        url_for("edit_page", uuid="first"),
    )
    assert bytes(trigger_text.encode('utf-8')) in res.data

-
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)
    
    # so that we set the state to 'unviewed' after all the edits
    client.get(url_for("diff_history_page", uuid="first"))
@@ -101,7 +104,8 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):
    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)

-    wait_for_all_checks(client)
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)

    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
@@ -113,17 +117,19 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):

    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    wait_for_all_checks(client)
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)

    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data

    # Now set the content which contains the trigger text
+    time.sleep(sleep_time_for_fetch_thread)
    set_modified_with_trigger_text_response()

    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    wait_for_all_checks(client)
+    time.sleep(sleep_time_for_fetch_thread)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
    
@@ -136,7 +142,4 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):
    res = client.get(url_for("preview_page", uuid="first"))

    # We should be able to see what we triggered on
-    # The JS highlighter should tell us which lines (also used in the live-preview)
-    assert b'const triggered_line_numbers = [6]' in res.data
-    assert b'Add to cart' in res.data
-
+    assert b'<div class="triggered">Add to cart' in res.data
@@ -11,8 +11,6 @@ def set_original_ignore_response():
     <p>Some initial text</p>
     <p>Which is across multiple lines</p>
     <p>So let's see what happens.</p>
-     <p>&nbsp;  So let's see what happens.   <br> </p>
-     <p>A - sortable line</p> 
     </body>
     </html>
    """
@@ -166,52 +164,5 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
    assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
    assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
    
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
-
-
-def test_extra_filters(client, live_server, measure_memory_usage):
-    #live_server_setup(live_server)
-
-    set_original_ignore_response()
-
-    # Add our URL to the import page
-    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
-    wait_for_all_checks(client)
-
-    # Add our URL to the import page
-    res = client.post(
-        url_for("edit_page", uuid="first"),
-        data={"remove_duplicate_lines": "y",
-              "trim_text_whitespace": "y",
-              "sort_text_alphabetically": "",  # leave this OFF for testing
-              "url": test_url,
-              "fetch_backend": "html_requests"},
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data
-    # Give the thread time to pick it up
-    wait_for_all_checks(client)
-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
-    # Give the thread time to pick it up
-    wait_for_all_checks(client)
-
-    res = client.get(
-        url_for("preview_page", uuid="first")
-    )
-
-    assert res.data.count(b"see what happens.") == 1
-
-    # still should remain unsorted ('A - sortable line') stays at the end
-    assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines')
-
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
@@ -161,8 +161,8 @@ def test_check_xpath_text_function_utf8(client, live_server, measure_memory_usag
        follow_redirects=True
    )

-    assert b'Stock Alert (UK): RPi CM4' in res.data
-    assert b'Stock Alert (UK): Big monitor' in res.data
+    assert b'<div class="">Stock Alert (UK): RPi CM4' in res.data
+    assert b'<div class="">Stock Alert (UK): Big monitor' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
@@ -278,7 +278,7 @@ class update_worker(threading.Thread):

                        update_handler.call_browser()

-                        changed_detected, update_obj, contents = update_handler.run_changedetection(
+                        changed_detected, update_obj, contents, content_after_filters = update_handler.run_changedetection(
                            watch=watch,
                            skip_when_checksum_same=skip_when_same_checksum,
                        )
@@ -338,8 +338,7 @@ class update_worker(threading.Thread):
                        elif e.status_code == 500:
                            err_text = "Error - 500 (Internal server error) received from the web site"
                        else:
-                            extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
-                            err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
+                            err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))

                        if e.screenshot:
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
@@ -492,8 +491,6 @@ class update_worker(threading.Thread):
                        if not self.datastore.data['watching'].get(uuid):
                            continue

-                        update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
-
                        # Mark that we never had any failures
                        if not watch.get('ignore_status_codes'):
                            update_obj['consecutive_filter_failures'] = 0
@@ -58,10 +58,6 @@ services:
  #
  #        Absolute minimum seconds to recheck, overrides any watch minimum, change to 0 to disable
  #      - MINIMUM_SECONDS_RECHECK_TIME=3
-  #
-  #        If you want to watch local files file:///path/to/file.txt (careful! security implications!)
-  #      - ALLOW_FILE_URI=False
-  
      # Comment out ports: when using behind a reverse proxy , enable networks: etc.
      ports:
        - 5000:5000
@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
 # jq not available on Windows so must be installed manually

 # Notification library
-apprise==1.9.0
+apprise~=1.8.1

 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
 # and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
@@ -93,5 +93,3 @@ babel

 # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
 greenlet >= 3.0.3
-
-
Author	SHA1	Message	Date
dgtlmoon	515b1bc87f	Merge branch 'master' into extra-filters	2024-09-17 18:32:14 +02:00
dgtlmoon	ea87b301d8	Merge branch 'master' into extra-filters	2024-09-11 11:32:12 +02:00
dgtlmoon	5108201f0b	Filters should apply at the end of the chain	2024-09-10 14:28:58 +02:00
dgtlmoon	7289e4e193	Fix bad example	2024-09-10 12:32:07 +02:00
dgtlmoon	34e684eb37	rearrange	2024-09-09 22:19:50 +02:00
dgtlmoon	f032a1b1b3	Adding form.remove_duplicate_lines	2024-09-09 22:16:16 +02:00
dgtlmoon	0506c01c07	Added form.trim_text_whitespace	2024-09-09 22:10:36 +02:00
dgtlmoon	09aae40c4a	tweak style	2024-09-09 22:03:59 +02:00
dgtlmoon	9270d4053b	smarter check?	2024-09-09 20:34:43 +02:00
dgtlmoon	160c267e9f	add elay	2024-09-09 18:09:03 +02:00
dgtlmoon	97f47e7b3b	Merge branch 'master' into text-filter-preview	2024-09-09 17:28:58 +02:00
dgtlmoon	7a496e3e15	tweak error messages	2024-09-09 11:35:28 +02:00
dgtlmoon	2c564d5c3f	Abort existing requests so it doesnt train-wreck	2024-09-07 15:47:54 +02:00
dgtlmoon	59b8971a96	test and label tweaks	2024-09-07 15:42:58 +02:00
dgtlmoon	801791f904	live preview of text filters	2024-09-06 22:53:28 +02:00