bump text

Also support RDF
format tweak
2025-10-30 22:27:52 +00:00 · 2025-10-10 17:36:28 +02:00 · 2025-10-10 17:34:22 +02:00 · 2025-10-10 17:29:20 +02:00 · 2025-10-10 17:26:20 +02:00 · 2025-10-10 17:11:59 +02:00
68 changed files with 1550 additions and 1187 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -11,6 +11,4 @@ updates:
  - package-ecosystem: pip
    directory: /
    schedule:
-      interval: "daily"
-    allow:
-      - dependency-name: "apprise"
+      interval: "weekly"
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -34,7 +34,7 @@ jobs:

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v3
+      uses: github/codeql-action/init@v4
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,7 +45,7 @@ jobs:
    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    - name: Autobuild
-      uses: github/codeql-action/autobuild@v3
+      uses: github/codeql-action/autobuild@v4

    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 https://git.io/JvXDl
@@ -59,4 +59,4 @@ jobs:
    #   make release

    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v3
+      uses: github/codeql-action/analyze@v4
--- a/.github/workflows/test-container-build.yml
+++ b/.github/workflows/test-container-build.yml
@@ -74,5 +74,5 @@ jobs:
            file: ${{ matrix.dockerfile }}
            platforms: ${{ matrix.platform }}
            cache-from: type=gha
-            cache-to: type=gha,mode=max
+            cache-to: type=gha,mode=min

--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.50.14'
+__version__ = '0.50.20'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/api/init.py
+++ b/changedetectionio/api/init.py
@@ -1,10 +1,7 @@
 import copy
-import yaml
 import functools
 from flask import request, abort
 from loguru import logger
-from openapi_core import OpenAPI
-from openapi_core.contrib.flask import FlaskOpenAPIRequest
 from . import api_schema
 from ..model import watch_base

@@ -34,7 +31,11 @@ schema_delete_notification_urls['required'] = ['notification_urls']

@functools.cache
 def get_openapi_spec():
+    """Lazy load OpenAPI spec and dependencies only when validation is needed."""
    import os
+    import yaml  # Lazy import - only loaded when API validation is actually used
+    from openapi_core import OpenAPI  # Lazy import - saves ~10.7 MB on startup
+
    spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
    with open(spec_path, 'r') as f:
        spec_dict = yaml.safe_load(f)
@@ -49,6 +50,9 @@ def validate_openapi_request(operation_id):
            try:
                # Skip OpenAPI validation for GET requests since they don't have request bodies
                if request.method.upper() != 'GET':
+                    # Lazy import - only loaded when actually validating a request
+                    from openapi_core.contrib.flask import FlaskOpenAPIRequest
+
                    spec = get_openapi_spec()
                    openapi_request = FlaskOpenAPIRequest(request)
                    result = spec.unmarshal_request(openapi_request)
--- a/changedetectionio/blueprint/settings/templates/settings.html
+++ b/changedetectionio/blueprint/settings/templates/settings.html
@@ -72,17 +72,24 @@
                        <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
                        </span>
                    </div>
-                    <div class="pure-control-group">
-                        {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
-                    </div>
-                    <div class="pure-control-group">
-                        {{ render_field(form.application.form.rss_content_format) }}
-                        <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
-                    </div>
                    <div class="pure-control-group">
                        {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
                        <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
                    </div>
+                    <div class="grey-form-border">
+                        <div class="pure-control-group">
+                            {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
+                        </div>
+                        <div class="pure-control-group">
+                            {{ render_field(form.application.form.rss_content_format) }}
+                            <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
+                        </div>
+                        <div class="pure-control-group">
+                            {{ render_checkbox_field(form.application.form.rss_reader_mode) }}
+                            <span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
+                        </div>
+                    </div>
+
                {% if form.requests.proxy %}
                    <div class="pure-control-group inline-radio">
                        {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -940,6 +940,10 @@ class globalSettingsApplicationForm(commonSettingsForm):
    strip_ignored_lines = BooleanField('Strip ignored lines')
    rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
                                      validators=[validators.Optional()])
+
+    rss_reader_mode = BooleanField('RSS reader mode ', default=False,
+                                      validators=[validators.Optional()])
+
    filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
                                                                  render_kw={"style": "width: 5em;"},
                                                                  validators=[validators.NumberRange(min=0,
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -1,5 +1,4 @@
 from loguru import logger
-from lxml import etree
 from typing import List
 import html
 import json
@@ -58,13 +57,17 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting

    return html_block

-def subtractive_css_selector(css_selector, html_content):
+def subtractive_css_selector(css_selector, content):
    from bs4 import BeautifulSoup
-    soup = BeautifulSoup(html_content, "html.parser")
+    soup = BeautifulSoup(content, "html.parser")

    # So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
    elements_to_remove = soup.select(css_selector)

+    if not elements_to_remove:
+        # Better to return the original that rebuild with BeautifulSoup
+        return content
+
    # Then, remove them in a separate loop
    for item in elements_to_remove:
        item.decompose()
@@ -72,6 +75,7 @@ def subtractive_css_selector(css_selector, html_content):
    return str(soup)

 def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
+    from lxml import etree
    # Parse the HTML content using lxml
    html_tree = etree.HTML(html_content)

@@ -83,6 +87,10 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
        # Collect elements for each selector
        elements_to_remove.extend(html_tree.xpath(selector))

+    # If no elements were found, return the original HTML content
+    if not elements_to_remove:
+        return html_content
+
    # Then, remove them in a separate loop
    for element in elements_to_remove:
        if element.getparent() is not None:  # Ensure the element has a parent before removing
@@ -100,7 +108,7 @@ def element_removal(selectors: List[str], html_content):
    xpath_selectors = []

    for selector in selectors:
-        if selector.startswith(('xpath:', 'xpath1:', '//')):
+        if selector.strip().startswith(('xpath:', 'xpath1:', '//')):
            # Handle XPath selectors separately
            xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
            xpath_selectors.append(xpath_selector)
@@ -295,70 +303,92 @@ def _get_stripped_text_from_json_match(match):

    return stripped_text_from_html

+def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter):
+    from bs4 import BeautifulSoup
+    stripped_text_from_html = ''
+
+    # Foreach <script json></script> blob.. just return the first that matches json_filter
+    # As a last resort, try to parse the whole <body>
+    soup = BeautifulSoup(content, 'html.parser')
+
+    if ensure_is_ldjson_info_type:
+        bs_result = soup.find_all('script', {"type": "application/ld+json"})
+    else:
+        bs_result = soup.find_all('script')
+    bs_result += soup.find_all('body')
+
+    bs_jsons = []
+
+    for result in bs_result:
+        # result.text is how bs4 magically strips JSON from the body
+        content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else ''
+        # Skip empty tags, and things that dont even look like JSON
+        if not result.text or not (content_start[0] == '{' or content_start[0] == '['):
+            continue
+        try:
+            json_data = json.loads(result.text)
+            bs_jsons.append(json_data)
+        except json.JSONDecodeError:
+            # Skip objects which cannot be parsed
+            continue
+
+    if not bs_jsons:
+        raise JSONNotFound("No parsable JSON found in this document")
+
+    for json_data in bs_jsons:
+        stripped_text_from_html = _parse_json(json_data, json_filter)
+
+        if ensure_is_ldjson_info_type:
+            # Could sometimes be list, string or something else random
+            if isinstance(json_data, dict):
+                # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
+                # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
+                # @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
+                # LD_JSON auto-extract also requires some content PLUS the ldjson to be present
+                # 1833 - could be either str or dict, should not be anything else
+
+                t = json_data.get('@type')
+                if t and stripped_text_from_html:
+
+                    if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
+                        break
+                    # The non-standard part, some have a list
+                    elif isinstance(t, list):
+                        if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
+                            break
+
+        elif stripped_text_from_html:
+            break
+
+    return stripped_text_from_html
+
 # content - json
 # json_filter - ie json:$..price
 # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
 def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
-    from bs4 import BeautifulSoup

    stripped_text_from_html = False
 # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
    # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
-    try:
-        # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
-        stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
-    except json.JSONDecodeError as e:
-        logger.warning(str(e))

-        # Foreach <script json></script> blob.. just return the first that matches json_filter
-        # As a last resort, try to parse the whole <body>
-        soup = BeautifulSoup(content, 'html.parser')
+    # Looks like clean JSON, dont bother extracting from HTML

-        if ensure_is_ldjson_info_type:
-            bs_result = soup.find_all('script', {"type": "application/ld+json"})
-        else:
-            bs_result = soup.find_all('script')
-        bs_result += soup.find_all('body')
+    content_start = content.lstrip("\ufeff").strip()[:100]

-        bs_jsons = []
-        for result in bs_result:
-            # Skip empty tags, and things that dont even look like JSON
-            if not result.text or '{' not in result.text:
-                continue
-            try:
-                json_data = json.loads(result.text)
-                bs_jsons.append(json_data)
-            except json.JSONDecodeError:
-                # Skip objects which cannot be parsed
-                continue
-
-        if not bs_jsons:
-            raise JSONNotFound("No parsable JSON found in this document")
-        
-        for json_data in bs_jsons:
-            stripped_text_from_html = _parse_json(json_data, json_filter)
-
-            if ensure_is_ldjson_info_type:
-                # Could sometimes be list, string or something else random
-                if isinstance(json_data, dict):
-                    # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
-                    # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
-                    # @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
-                    # LD_JSON auto-extract also requires some content PLUS the ldjson to be present
-                    # 1833 - could be either str or dict, should not be anything else
-
-                    t = json_data.get('@type')
-                    if t and stripped_text_from_html:
-
-                        if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
-                            break
-                        # The non-standard part, some have a list
-                        elif isinstance(t, list):
-                            if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
-                                break
-
-            elif stripped_text_from_html:
-                break
+    if content_start[0] == '{' or content_start[0] == '[':
+        try:
+            # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
+            stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter)
+        except json.JSONDecodeError as e:
+            logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
+    else:
+        # Probably something else, go fish inside for it
+        try:
+            stripped_text_from_html = extract_json_blob_from_html(content=content,
+                                                                  ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
+                                                                  json_filter=json_filter                                                                  )
+        except json.JSONDecodeError as e:
+            logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")

    if not stripped_text_from_html:
        # Re 265 - Just return an empty string when filter not found
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -55,6 +55,7 @@ class model(dict):
                    'rss_access_token': None,
                    'rss_content_format': RSS_FORMAT_TYPES[0][0],
                    'rss_hide_muted_watches': True,
+                    'rss_reader_mode': False,
                    'schema_version' : 0,
                    'shared_diff_access': False,
                    'strip_ignored_lines': False,
--- a/changedetectionio/processors/magic.py
+++ b/changedetectionio/processors/magic.py
@@ -0,0 +1,126 @@
+"""
+Content Type Detection and Stream Classification
+
+This module provides intelligent content-type detection for changedetection.io.
+It addresses the common problem where HTTP Content-Type headers are missing, incorrect,
+or too generic, which would otherwise cause the wrong processor to be used.
+
+The guess_stream_type class combines:
+1. HTTP Content-Type headers (when available and reliable)
+2. Python-magic library for MIME detection (analyzing actual file content)
+3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.)
+
+This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF,
+plain text, CSV, YAML, and XML formats - even when servers provide misleading headers.
+
+Used by: processors/text_json_diff/processor.py and other content processors
+"""
+
+# When to apply the 'cdata to real HTML' hack
+RSS_XML_CONTENT_TYPES = [
+    "application/rss+xml",
+    "application/rdf+xml",
+    "application/atom+xml",
+    "text/rss+xml",  # rare, non-standard
+    "application/x-rss+xml",  # legacy (older feed software)
+    "application/x-atom+xml",  # legacy (older Atom)
+]
+
+# JSON Content-types
+JSON_CONTENT_TYPES = [
+    "application/activity+json",
+    "application/feed+json",
+    "application/json",
+    "application/ld+json",
+    "application/vnd.api+json",
+]
+
+
+# Generic XML Content-types (non-RSS/Atom)
+XML_CONTENT_TYPES = [
+    "text/xml",
+    "application/xml",
+]
+
+HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
+
+from loguru import logger
+
+class guess_stream_type():
+    is_pdf = False
+    is_json = False
+    is_html = False
+    is_plaintext = False
+    is_rss = False
+    is_csv = False
+    is_xml = False  # Generic XML, not RSS/Atom
+    is_yaml = False
+
+    def __init__(self, http_content_header, content):
+        import re
+        magic_content_header = http_content_header
+        test_content = content[:200].lower().strip()
+
+        # Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.)
+        test_content_normalized = re.sub(r'<\s+', '<', test_content)
+
+        # Magic will sometimes call text/plain as text/html!
+        magic_result = None
+        try:
+            import magic
+
+            mime = magic.from_buffer(content[:200], mime=True) # Send the original content
+            logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
+            if mime and "/" in mime:
+                magic_result = mime
+                # Ignore generic/fallback mime types from magic
+                if mime in ['application/octet-stream', 'application/x-empty', 'binary']:
+                    logger.debug(f"Ignoring generic mime type '{mime}' from magic library")
+                # Trust magic for non-text types immediately
+                elif mime not in ['text/html', 'text/plain']:
+                    magic_content_header = mime
+
+        except Exception as e:
+            logger.error(f"Error getting a more precise mime type from 'magic' library ({str(e)}), using content-based detection")
+
+        # Content-based detection (most reliable for text formats)
+        # Check for HTML patterns first - if found, override magic's text/plain
+        has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
+
+        # Always trust headers first
+        if 'text/plain' in http_content_header:
+            self.is_plaintext = True
+        if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
+            self.is_rss = True
+        elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
+            self.is_json = True
+        elif 'pdf' in magic_content_header:
+            self.is_pdf = True
+        elif has_html_patterns or http_content_header == 'text/html':
+            self.is_html = True
+        elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
+            self.is_json = True
+        # magic will call a rss document 'xml'
+        # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
+        # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
+        elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
+            self.is_rss = True
+        elif any(s in http_content_header for s in XML_CONTENT_TYPES):
+            # Only mark as generic XML if not already detected as RSS
+            if not self.is_rss:
+                self.is_xml = True
+        elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
+            # Generic XML that's not RSS/Atom (RSS/Atom checked above)
+            self.is_xml = True
+        elif '%pdf-1' in test_content:
+            self.is_pdf = True
+        elif http_content_header.startswith('text/'):
+            self.is_plaintext = True
+        # Only trust magic for 'text' if no other patterns matched
+        elif 'text' in magic_content_header:
+            self.is_plaintext = True
+        # If magic says text/plain and we found no HTML patterns, trust it
+        elif magic_result == 'text/plain':
+            self.is_plaintext = True
+            logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
+
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -13,12 +13,17 @@ from changedetectionio import html_tools, content_fetchers
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from loguru import logger

+from changedetectionio.processors.magic import guess_stream_type
+
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

 name = 'Webpage Text/HTML, JSON and PDF changes'
 description = 'Detects all text changes where possible'

-json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
+JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']
+
+# Assume it's this type if the server says nothing on content-type
+DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html'

 class FilterNotFoundInResponse(ValueError):
    def __init__(self, msg, screenshot=None, xpath_data=None):
@@ -32,372 +37,546 @@ class PDFToHTMLToolNotFound(ValueError):
        ValueError.__init__(self, msg)


+class FilterConfig:
+    """Consolidates all filter and rule configurations from watch, tags, and global settings."""
+
+    def __init__(self, watch, datastore):
+        self.watch = watch
+        self.datastore = datastore
+        self.watch_uuid = watch.get('uuid')
+        # Cache computed properties to avoid repeated list operations
+        self._include_filters_cache = None
+        self._subtractive_selectors_cache = None
+
+    def _get_merged_rules(self, attr, include_global=False):
+        """Merge rules from watch, tags, and optionally global settings."""
+        watch_rules = self.watch.get(attr, [])
+        tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr)
+        rules = list(dict.fromkeys(watch_rules + tag_rules))
+
+        if include_global:
+            global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', [])
+            rules = list(dict.fromkeys(rules + global_rules))
+
+        return rules
+
+    @property
+    def include_filters(self):
+        if self._include_filters_cache is None:
+            filters = self._get_merged_rules('include_filters')
+            # Inject LD+JSON price tracker rule if enabled
+            if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
+                filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
+            self._include_filters_cache = filters
+        return self._include_filters_cache
+
+    @property
+    def subtractive_selectors(self):
+        if self._subtractive_selectors_cache is None:
+            watch_selectors = self.watch.get("subtractive_selectors", [])
+            tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors')
+            global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
+            self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
+        return self._subtractive_selectors_cache
+
+    @property
+    def extract_text(self):
+        return self._get_merged_rules('extract_text')
+
+    @property
+    def ignore_text(self):
+        return self._get_merged_rules('ignore_text', include_global=True)
+
+    @property
+    def trigger_text(self):
+        return self._get_merged_rules('trigger_text')
+
+    @property
+    def text_should_not_be_present(self):
+        return self._get_merged_rules('text_should_not_be_present')
+
+    @property
+    def has_include_filters(self):
+        return bool(self.include_filters) and bool(self.include_filters[0].strip())
+
+    @property
+    def has_include_json_filters(self):
+        return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES)
+
+    @property
+    def has_subtractive_selectors(self):
+        return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip())
+
+
+class ContentTransformer:
+    """Handles text transformations like trimming, sorting, and deduplication."""
+
+    @staticmethod
+    def trim_whitespace(text):
+        """Remove leading/trailing whitespace from each line."""
+        # Use generator expression to avoid building intermediate list
+        return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines())
+
+    @staticmethod
+    def remove_duplicate_lines(text):
+        """Remove duplicate lines while preserving order."""
+        return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines()))
+
+    @staticmethod
+    def sort_alphabetically(text):
+        """Sort lines alphabetically (case-insensitive)."""
+        # Remove double line feeds before sorting
+        text = text.replace("\n\n", "\n")
+        return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
+
+    @staticmethod
+    def extract_by_regex(text, regex_patterns):
+        """Extract text matching regex patterns."""
+        # Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior)
+        regex_matched_output = []
+
+        for s_re in regex_patterns:
+            # Check if it's perl-style regex /.../
+            if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
+                regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
+                result = re.findall(regex, text)
+
+                for match in result:
+                    if type(match) is tuple:
+                        regex_matched_output.extend(match)
+                        regex_matched_output.append('\n')
+                    else:
+                        regex_matched_output.append(match)
+                        regex_matched_output.append('\n')
+            else:
+                # Plain text search (case-insensitive)
+                r = re.compile(re.escape(s_re), re.IGNORECASE)
+                res = r.findall(text)
+                if res:
+                    for match in res:
+                        regex_matched_output.append(match)
+                        regex_matched_output.append('\n')
+
+        return ''.join(regex_matched_output) if regex_matched_output else ''
+
+
+class RuleEngine:
+    """Evaluates blocking rules (triggers, conditions, text_should_not_be_present)."""
+
+    @staticmethod
+    def evaluate_trigger_text(content, trigger_patterns):
+        """
+        Check if trigger text is present. If trigger_text is configured,
+        content is blocked UNLESS the trigger is found.
+        Returns True if blocked, False if allowed.
+        """
+        if not trigger_patterns:
+            return False
+
+        # Assume blocked if trigger_text is configured
+        result = html_tools.strip_ignore_text(
+            content=str(content),
+            wordlist=trigger_patterns,
+            mode="line numbers"
+        )
+        # Unblock if trigger was found
+        return not bool(result)
+
+    @staticmethod
+    def evaluate_text_should_not_be_present(content, patterns):
+        """
+        Check if forbidden text is present. If found, block the change.
+        Returns True if blocked, False if allowed.
+        """
+        if not patterns:
+            return False
+
+        result = html_tools.strip_ignore_text(
+            content=str(content),
+            wordlist=patterns,
+            mode="line numbers"
+        )
+        # Block if forbidden text was found
+        return bool(result)
+
+    @staticmethod
+    def evaluate_conditions(watch, datastore, content):
+        """
+        Evaluate custom conditions ruleset.
+        Returns True if blocked, False if allowed.
+        """
+        if not watch.get('conditions') or not watch.get('conditions_match_logic'):
+            return False
+
+        conditions_result = execute_ruleset_against_all_plugins(
+            current_watch_uuid=watch.get('uuid'),
+            application_datastruct=datastore.data,
+            ephemeral_data={'text': content}
+        )
+
+        # Block if conditions not met
+        return not conditions_result.get('result')
+
+
+class ContentProcessor:
+    """Handles content preprocessing, filtering, and extraction."""
+
+    def __init__(self, fetcher, watch, filter_config, datastore):
+        self.fetcher = fetcher
+        self.watch = watch
+        self.filter_config = filter_config
+        self.datastore = datastore
+
+    def preprocess_rss(self, content):
+        """
+        Convert CDATA/comments in RSS to usable text.
+
+        Supports two RSS processing modes:
+        - 'default': Inline CDATA replacement (original behavior)
+        - 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked)
+        """
+        from changedetectionio import rss_tools
+        rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode")
+        if rss_mode:
+            # Format RSS items nicely with CDATA content unmarked and converted to text
+            return rss_tools.format_rss_items(content)
+        else:
+            # Default: Original inline CDATA replacement
+            return cdata_in_document_to_text(html_content=content)
+
+    def preprocess_pdf(self, raw_content):
+        """Convert PDF to HTML using external tool."""
+        from shutil import which
+        tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
+        if not which(tool):
+            raise PDFToHTMLToolNotFound(
+                f"Command-line `{tool}` tool was not found in system PATH, was it installed?"
+            )
+
+        import subprocess
+        proc = subprocess.Popen(
+            [tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
+            stdout=subprocess.PIPE,
+            stdin=subprocess.PIPE
+        )
+        proc.stdin.write(raw_content)
+        proc.stdin.close()
+        html_content = proc.stdout.read().decode('utf-8')
+        proc.wait(timeout=60)
+
+        # Add metadata for change detection
+        metadata = (
+            f"<p>Added by changedetection.io: Document checksum - "
+            f"{hashlib.md5(raw_content).hexdigest().upper()} "
+            f"Original file size - {len(raw_content)} bytes</p>"
+        )
+        return html_content.replace('</body>', metadata + '</body>')
+
+    def preprocess_json(self, raw_content):
+        """Format and sort JSON content."""
+        # Then we re-format it, else it does have filters (later on) which will reformat it anyway
+        content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$")
+
+        # Sort JSON to avoid false alerts from reordering
+        try:
+            content = json.dumps(json.loads(content), sort_keys=True, indent=4)
+        except Exception:
+            # Might be malformed JSON, continue anyway
+            pass
+
+        return content
+
+    def apply_include_filters(self, content, stream_content_type):
+        """Apply CSS, XPath, or JSON filters to extract specific content."""
+        filtered_content = ""
+
+        for filter_rule in self.filter_config.include_filters:
+            # XPath filters
+            if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
+                filtered_content += html_tools.xpath_filter(
+                    xpath_filter=filter_rule.replace('xpath:', ''),
+                    html_content=content,
+                    append_pretty_line_formatting=not self.watch.is_source_type_url,
+                    is_rss=stream_content_type.is_rss
+                )
+
+            # XPath1 filters (first match only)
+            elif filter_rule.startswith('xpath1:'):
+                filtered_content += html_tools.xpath1_filter(
+                    xpath_filter=filter_rule.replace('xpath1:', ''),
+                    html_content=content,
+                    append_pretty_line_formatting=not self.watch.is_source_type_url,
+                    is_rss=stream_content_type.is_rss
+                )
+
+            # JSON filters
+            elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES):
+                filtered_content += html_tools.extract_json_as_string(
+                    content=content,
+                    json_filter=filter_rule
+                )
+
+            # CSS selectors, default fallback
+            else:
+                filtered_content += html_tools.include_filters(
+                    include_filters=filter_rule,
+                    html_content=content,
+                    append_pretty_line_formatting=not self.watch.is_source_type_url
+                )
+
+            # Raise error if filter returned nothing
+            if not filtered_content.strip():
+                raise FilterNotFoundInResponse(
+                    msg=self.filter_config.include_filters,
+                    screenshot=self.fetcher.screenshot,
+                    xpath_data=self.fetcher.xpath_data
+                )
+
+        return filtered_content
+
+    def apply_subtractive_selectors(self, content):
+        """Remove elements matching subtractive selectors."""
+        return html_tools.element_removal(self.filter_config.subtractive_selectors, content)
+
+    def extract_text_from_html(self, html_content, stream_content_type):
+        """Convert HTML to plain text."""
+        do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
+        return html_tools.html_to_text(
+            html_content=html_content,
+            render_anchor_tag_content=do_anchor,
+            is_rss=stream_content_type.is_rss
+        )
+
+
+class ChecksumCalculator:
+    """Calculates checksums with various options."""
+
+    @staticmethod
+    def calculate(text, ignore_whitespace=False):
+        """Calculate MD5 checksum of text content."""
+        if ignore_whitespace:
+            text = text.translate(TRANSLATE_WHITESPACE_TABLE)
+        return hashlib.md5(text.encode('utf-8')).hexdigest()
+
+
 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

    def run_changedetection(self, watch):
        changed_detected = False
-        html_content = ""
-        screenshot = False  # as bytes
-        stripped_text_from_html = ""

        if not watch:
            raise Exception("Watch no longer exists.")

+        # Initialize components
+        filter_config = FilterConfig(watch, self.datastore)
+        content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
+        transformer = ContentTransformer()
+        rule_engine = RuleEngine()
+
+        # Get content type and stream info
+        ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower()
+        stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content)
+
        # Unset any existing notification error
        update_obj = {'last_notification_error': False, 'last_error': False}
-
        url = watch.link

        self.screenshot = self.fetcher.screenshot
        self.xpath_data = self.fetcher.xpath_data

-        # Track the content type
-        update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower()
-
-        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
-        # Saves a lot of CPU
+        # Track the content type and checksum before filters
+        update_obj['content_type'] = ctype_header
        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()

-        # Fetching complete, now filters
+        # === CONTENT PREPROCESSING ===
+        # Avoid creating unnecessary intermediate string copies by reassigning only when needed
+        content = self.fetcher.content

-        # @note: I feel like the following should be in a more obvious chain system
-        #  - Check filter text
-        #  - Is the checksum different?
-        #  - Do we convert to JSON?
-        # https://stackoverflow.com/questions/41817578/basic-method-chaining ?
-        # return content().textfilter().jsonextract().checksumcompare() ?
+        # RSS preprocessing
+        if stream_content_type.is_rss:
+            content = content_processor.preprocess_rss(content)
+            if self.datastore.data["settings"]["application"].get("rss_reader_mode"):
+                # Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc)
+                stream_content_type.is_rss = False
+                stream_content_type.is_html = True
+                self.fetcher.content = content

-        is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower()
-        is_html = not is_json
-        is_rss = False
+        # PDF preprocessing
+        if watch.is_pdf or stream_content_type.is_pdf:
+            content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content)
+            stream_content_type.is_html = True

-        ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
-        # Go into RSS preprocess for converting CDATA/comment to usable text
-        if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
-            if '<rss' in self.fetcher.content[:100].lower():
-                self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
-                is_rss = True
+        # JSON - Always reformat it nicely for consistency.

-        # source: support, basically treat it as plaintext
+        if stream_content_type.is_json:
+            if not filter_config.has_include_json_filters:
+                content = content_processor.preprocess_json(raw_content=content)
+        #else, otherwise it gets sorted/formatted in the filter stage anyway
+
+        # HTML obfuscation workarounds
+        if stream_content_type.is_html:
+            content = html_tools.workarounds_for_obfuscations(content)
+
+        # Check for LD+JSON price data (for HTML content)
+        if stream_content_type.is_html:
+            update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content)
+
+        # === FILTER APPLICATION ===
+        # Start with content reference, avoid copy until modification
+        html_content = content
+
+        # Apply include filters (CSS, XPath, JSON)
+        # Except for plaintext (incase they tried to confuse the system, it will HTML escape
+        #if not stream_content_type.is_plaintext:
+        if filter_config.has_include_filters:
+            html_content = content_processor.apply_include_filters(content, stream_content_type)
+
+        # Apply subtractive selectors
+        if filter_config.has_subtractive_selectors:
+            html_content = content_processor.apply_subtractive_selectors(html_content)
+
+        # === TEXT EXTRACTION ===
        if watch.is_source_type_url:
-            is_html = False
-            is_json = False
-
-        inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10]
-        if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
-            from shutil import which
-            tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
-            if not which(tool):
-                raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool))
-
-            import subprocess
-            proc = subprocess.Popen(
-                [tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
-                stdout=subprocess.PIPE,
-                stdin=subprocess.PIPE)
-            proc.stdin.write(self.fetcher.raw_content)
-            proc.stdin.close()
-            self.fetcher.content = proc.stdout.read().decode('utf-8')
-            proc.wait(timeout=60)
-
-            # Add a little metadata so we know if the file changes (like if an image changes, but the text is the same
-            # @todo may cause problems with non-UTF8?
-            metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format(
-                hashlib.md5(self.fetcher.raw_content).hexdigest().upper(),
-                len(self.fetcher.content))
-
-            self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>')
-
-        # Better would be if Watch.model could access the global data also
-        # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
-        # https://realpython.com/inherit-python-dict/ instead of doing it procedurely
-        include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
-
-        # 1845 - remove duplicated filters in both group and watch include filter
-        include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
-
-        subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
-                                 *watch.get("subtractive_selectors", []),
-                                 *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
-                                 ]
-
-        # Inject a virtual LD+JSON price tracker rule
-        if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
-            include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
-
-        has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
-        has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
-
-        if is_json and not has_filter_rule:
-            include_filters_rule.append("json:$")
-            has_filter_rule = True
-
-        if is_json:
-            # Sort the JSON so we dont get false alerts when the content is just re-ordered
-            try:
-                self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True)
-            except Exception as e:
-                # Might have just been a snippet, or otherwise bad JSON, continue
-                pass
-
-        if has_filter_rule:
-            for filter in include_filters_rule:
-                if any(prefix in filter for prefix in json_filter_prefixes):
-                    stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
-                    is_html = False
-
-        if is_html or watch.is_source_type_url:
-
-            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
-            self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
-            html_content = self.fetcher.content
-            content_type = self.fetcher.get_all_headers().get('content-type', '').lower()
-            is_attachment = 'attachment' in self.fetcher.get_all_headers().get('content-disposition', '').lower()
-
-            # Try to detect better mime types if its a download or not announced as HTML
-            if is_attachment or 'octet-stream' in content_type or not 'html' in content_type:
-                logger.debug(f"Got a reply that may be a download or possibly a text attachment, checking..")
-                try:
-                    import magic
-                    mime = magic.from_buffer(html_content, mime=True)
-                    logger.debug(f"Guessing mime type, original content_type '{content_type}', mime type detected '{mime}'")
-                    if mime and "/" in mime: # looks valid and is a valid mime type
-                        content_type = mime
-                except Exception as e:
-                    logger.error(f"Error getting a more precise mime type from 'magic' library ({str(e)}")
-
-            if 'text/' in content_type and not 'html' in content_type:
-                # Don't run get_text or xpath/css filters on plaintext
-                stripped_text_from_html = html_content
+            # For source URLs, keep raw content
+            stripped_text = html_content
+        elif stream_content_type.is_plaintext:
+            # For plaintext, keep as-is without HTML-to-text conversion
+            stripped_text = html_content
+        else:
+            # Extract text from HTML/RSS content (not generic XML)
+            if stream_content_type.is_html or stream_content_type.is_rss:
+                stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type)
            else:
-                # If not JSON, and if it's not text/plain..
-                # Does it have some ld+json price data? used for easier monitoring
-                update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content)
-
-                # Then we assume HTML
-                if has_filter_rule:
-                    html_content = ""
-
-                    for filter_rule in include_filters_rule:
-                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
-                        if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
-                            html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
-                                                                    html_content=self.fetcher.content,
-                                                                    append_pretty_line_formatting=not watch.is_source_type_url,
-                                                                    is_rss=is_rss)
-
-                        elif filter_rule.startswith('xpath1:'):
-                            html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
-                                                                     html_content=self.fetcher.content,
-                                                                     append_pretty_line_formatting=not watch.is_source_type_url,
-                                                                     is_rss=is_rss)
-                        else:
-                            html_content += html_tools.include_filters(include_filters=filter_rule,
-                                                                       html_content=self.fetcher.content,
-                                                                       append_pretty_line_formatting=not watch.is_source_type_url)
-
-                    if not html_content.strip():
-                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
-
-                if has_subtractive_selectors:
-                    html_content = html_tools.element_removal(subtractive_selectors, html_content)
-
-                if watch.is_source_type_url:
-                    stripped_text_from_html = html_content
-                else:
-                    # extract text
-                    do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
-                    stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
-                                                                      render_anchor_tag_content=do_anchor,
-                                                                      is_rss=is_rss)  # 1874 activate the <title workaround hack
+                stripped_text = html_content

+        # === TEXT TRANSFORMATIONS ===
        if watch.get('trim_text_whitespace'):
-            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
+            stripped_text = transformer.trim_whitespace(stripped_text)

-        # Re #340 - return the content before the 'ignore text' was applied
-        # Also used to calculate/show what was removed
-        text_content_before_ignored_filter = stripped_text_from_html
-
-        # @todo whitespace coming from missing rtrim()?
-        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
-        # Rewrite's the processing text based on only what diff result they want to see
+        # Save text before ignore filters (for diff calculation)
+        text_content_before_ignored_filter = stripped_text

+        # === DIFF FILTERING ===
+        # If user wants specific diff types (added/removed/replaced only)
        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
-            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
-            from changedetectionio import diff
-            # needs to not include (added) etc or it may get used twice
-            # Replace the processed text with the preferred result
-            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
-                                             newest_version_file_contents=stripped_text_from_html,
-                                             include_equal=False,  # not the same lines
-                                             include_added=watch.get('filter_text_added', True),
-                                             include_removed=watch.get('filter_text_removed', True),
-                                             include_replaced=watch.get('filter_text_replaced', True),
-                                             line_feed_sep="\n",
-                                             include_change_type_prefix=False)
+            stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter)
+            if stripped_text is None:
+                # No differences found, but content exists
+                c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
+                return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')

-            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
-
-            if not rendered_diff and stripped_text_from_html:
-                # We had some content, but no differences were found
-                # Store our new file as the MD5 so it will trigger in the future
-                c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
-                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
-            else:
-                stripped_text_from_html = rendered_diff
-
-        # Treat pages with no renderable text content as a change? No by default
+        # === EMPTY PAGE CHECK ===
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
-        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
-            raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
-                                                            status_code=self.fetcher.get_last_status_code(),
-                                                            screenshot=self.fetcher.screenshot,
-                                                            has_filters=has_filter_rule,
-                                                            html_content=html_content,
-                                                            xpath_data=self.fetcher.xpath_data
-                                                            )
-
-        # We rely on the actual text in the html output.. many sites have random script vars etc,
-        # in the future we'll implement other mechanisms.
+        if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
+            raise content_fetchers.exceptions.ReplyWithContentButNoText(
+                url=url,
+                status_code=self.fetcher.get_last_status_code(),
+                screenshot=self.fetcher.screenshot,
+                has_filters=filter_config.has_include_filters,
+                html_content=html_content,
+                xpath_data=self.fetcher.xpath_data
+            )

        update_obj["last_check_status"] = self.fetcher.get_last_status_code()

-        # 615 Extract text by regex
-        extract_text = list(dict.fromkeys(watch.get('extract_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')))
-        if len(extract_text) > 0:
-            regex_matched_output = []
-            for s_re in extract_text:
-                # incase they specified something in '/.../x'
-                if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
-                    regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
-                    result = re.findall(regex, stripped_text_from_html)
-
-                    for l in result:
-                        if type(l) is tuple:
-                            # @todo - some formatter option default (between groups)
-                            regex_matched_output += list(l) + ['\n']
-                        else:
-                            # @todo - some formatter option default (between each ungrouped result)
-                            regex_matched_output += [l] + ['\n']
-                else:
-                    # Doesnt look like regex, just hunt for plaintext and return that which matches
-                    # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
-                    r = re.compile(re.escape(s_re), re.IGNORECASE)
-                    res = r.findall(stripped_text_from_html)
-                    if res:
-                        for match in res:
-                            regex_matched_output += [match] + ['\n']
-
-            ##########################################################
-            stripped_text_from_html = ''
-
-            if regex_matched_output:
-                # @todo some formatter for presentation?
-                stripped_text_from_html = ''.join(regex_matched_output)
+        # === REGEX EXTRACTION ===
+        if filter_config.extract_text:
+            extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
+            stripped_text = extracted

+        # === MORE TEXT TRANSFORMATIONS ===
        if watch.get('remove_duplicate_lines'):
-            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
-
+            stripped_text = transformer.remove_duplicate_lines(stripped_text)

        if watch.get('sort_text_alphabetically'):
-            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
-            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
-            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
-            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+            stripped_text = transformer.sort_alphabetically(stripped_text)

-### CALCULATE MD5
-        # If there's text to ignore
-        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
-        text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
+        # === CHECKSUM CALCULATION ===
+        text_for_checksuming = stripped_text

-        text_for_checksuming = stripped_text_from_html
-        if text_to_ignore:
-            text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
-            # Some people prefer to also completely remove it
-            strip_ignored_lines = watch.get('strip_ignored_lines') if watch.get('strip_ignored_lines') is not None else self.datastore.data['settings']['application'].get('strip_ignored_lines')
+        # Apply ignore_text for checksum calculation
+        if filter_config.ignore_text:
+            text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text)
+
+            # Optionally remove ignored lines from output
+            strip_ignored_lines = watch.get('strip_ignored_lines')
+            if strip_ignored_lines is None:
+                strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines')
            if strip_ignored_lines:
-                # @todo add test in the 'preview' mode, check the widget works? compare to datastruct
-                stripped_text_from_html = text_for_checksuming
+                stripped_text = text_for_checksuming

-        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
-        if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
-            fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
-        else:
-            fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
+        # Calculate checksum
+        ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
+        fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace)

-        ############ Blocking rules, after checksum #################
+        # === BLOCKING RULES EVALUATION ===
        blocked = False
-        trigger_text = list(dict.fromkeys(watch.get('trigger_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')))
-        if len(trigger_text):
-            # Assume blocked
+
+        # Check trigger_text
+        if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
            blocked = True
-            # Filter and trigger works the same, so reuse it
-            # It should return the line numbers that match
-            # Unblock flow if the trigger was found (some text remained after stripped what didnt match)
-            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
-                                                  wordlist=trigger_text,
-                                                  mode="line numbers")
-            # Unblock if the trigger was found
-            if result:
-                blocked = False

-        text_should_not_be_present = list(dict.fromkeys(watch.get('text_should_not_be_present', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')))
-        if len(text_should_not_be_present):
-            # If anything matched, then we should block a change from happening
-            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
-                                                  wordlist=text_should_not_be_present,
-                                                  mode="line numbers")
-            if result:
-                blocked = True
+        # Check text_should_not_be_present
+        if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present):
+            blocked = True

-        # And check if 'conditions' will let this pass through
-        if watch.get('conditions') and watch.get('conditions_match_logic'):
-            conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
-                                                                    application_datastruct=self.datastore.data,
-                                                                    ephemeral_data={
-                                                                        'text': stripped_text_from_html
-                                                                    }
-                                                                    )
+        # Check custom conditions
+        if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text):
+            blocked = True

-            if not conditions_result.get('result'):
-                # Conditions say "Condition not met" so we block it.
-                blocked = True
-
-        # Looks like something changed, but did it match all the rules?
+        # === CHANGE DETECTION ===
        if blocked:
            changed_detected = False
        else:
-            # The main thing that all this at the moment comes down to :)
+            # Compare checksums
            if watch.get('previous_md5') != fetched_md5:
                changed_detected = True

            # Always record the new checksum
            update_obj["previous_md5"] = fetched_md5

-            # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
+            # On first run, initialize previous_md5
            if not watch.get('previous_md5'):
                watch['previous_md5'] = fetched_md5

        logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")

-        if changed_detected:
-            if watch.get('check_unique_lines', False):
-                ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
+        # === UNIQUE LINES CHECK ===
+        if changed_detected and watch.get('check_unique_lines', False):
+            has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
+                lines=stripped_text.splitlines(),
+                ignore_whitespace=ignore_whitespace
+            )

-                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
-                    lines=stripped_text_from_html.splitlines(),
-                    ignore_whitespace=ignore_whitespace
-                )
+            if not has_unique_lines:
+                logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
+                changed_detected = False
+            else:
+                logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")

-                # One or more lines? unsure?
-                if not has_unique_lines:
-                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
-                    changed_detected = False
-                else:
-                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
+        return changed_detected, update_obj, stripped_text

+    def _apply_diff_filtering(self, watch, stripped_text, text_before_filter):
+        """Apply user's diff filtering preferences (show only added/removed/replaced lines)."""
+        from changedetectionio import diff

-        # stripped_text_from_html - Everything after filters and NO 'ignored' content
-        return changed_detected, update_obj, stripped_text_from_html
+        rendered_diff = diff.render_diff(
+            previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
+            newest_version_file_contents=stripped_text,
+            include_equal=False,
+            include_added=watch.get('filter_text_added', True),
+            include_removed=watch.get('filter_text_removed', True),
+            include_replaced=watch.get('filter_text_replaced', True),
+            line_feed_sep="\n",
+            include_change_type_prefix=False
+        )
+
+        watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8'))
+
+        if not rendered_diff and stripped_text:
+            # No differences found
+            return None
+
+        return rendered_diff
--- a/changedetectionio/rss_tools.py
+++ b/changedetectionio/rss_tools.py
@@ -0,0 +1,130 @@
+"""
+RSS/Atom feed processing tools for changedetection.io
+"""
+
+from loguru import logger
+import re
+
+
+def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
+    """
+    Process CDATA sections in HTML/XML content - inline replacement.
+
+    Args:
+        html_content: The HTML/XML content to process
+        render_anchor_tag_content: Whether to render anchor tag content
+
+    Returns:
+        Processed HTML/XML content with CDATA sections replaced inline
+    """
+    from xml.sax.saxutils import escape as xml_escape
+    from .html_tools import html_to_text
+
+    pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
+
+    def repl(m):
+        text = m.group(1)
+        return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip()
+
+    return re.sub(pattern, repl, html_content)
+
+
+def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
+    """
+    Format RSS/Atom feed items in a readable text format using feedparser.
+
+    Converts RSS <item> or Atom <entry> elements to formatted text with:
+    - <title> → <h1>Title</h1>
+    - <link> → Link: [url]
+    - <guid> → Guid: [id]
+    - <pubDate> → PubDate: [date]
+    - <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
+
+    Args:
+        rss_content: The RSS/Atom feed content
+        render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility)
+
+    Returns:
+        Formatted HTML content ready for html_to_text conversion
+    """
+    try:
+        import feedparser
+        from xml.sax.saxutils import escape as xml_escape
+
+        # Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
+        feed = feedparser.parse(rss_content)
+
+        formatted_items = []
+
+        # Determine feed type for appropriate labels when fields are missing
+        # feedparser sets feed.version to things like 'rss20', 'atom10', etc.
+        is_atom = feed.version and 'atom' in feed.version
+
+        for entry in feed.entries:
+            item_parts = []
+
+            # Title - feedparser handles CDATA and entity unescaping automatically
+            if hasattr(entry, 'title') and entry.title:
+                item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>')
+
+            # Link
+            if hasattr(entry, 'link') and entry.link:
+                item_parts.append(f'Link: {xml_escape(entry.link)}<br>')
+
+            # GUID/ID
+            if hasattr(entry, 'id') and entry.id:
+                item_parts.append(f'Guid: {xml_escape(entry.id)}<br>')
+
+            # Date - feedparser normalizes all date field names to 'published'
+            if hasattr(entry, 'published') and entry.published:
+                item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>')
+
+            # Description/Content - feedparser handles CDATA and entity unescaping automatically
+            # Only add "Summary:" label for Atom <summary> tags
+            content = None
+            add_label = False
+
+            if hasattr(entry, 'content') and entry.content:
+                # Atom <content> - no label, just content
+                content = entry.content[0].value if entry.content[0].value else None
+            elif hasattr(entry, 'summary'):
+                # Could be RSS <description> or Atom <summary>
+                # feedparser maps both to entry.summary
+                content = entry.summary if entry.summary else None
+                # Only add "Summary:" label for Atom feeds (which use <summary> tag)
+                if is_atom:
+                    add_label = True
+
+            # Add content with or without label
+            if content:
+                if add_label:
+                    item_parts.append(f'Summary:<br>{content}')
+                else:
+                    item_parts.append(content)
+            else:
+                # No content - just show <none>
+                item_parts.append('&lt;none&gt;')
+
+            # Join all parts of this item
+            if item_parts:
+                formatted_items.append('\n'.join(item_parts))
+
+        # Wrap each item in a div with classes (first, last, item-N)
+        items_html = []
+        total_items = len(formatted_items)
+        for idx, item in enumerate(formatted_items):
+            classes = ['rss-item']
+            if idx == 0:
+                classes.append('first')
+            if idx == total_items - 1:
+                classes.append('last')
+            classes.append(f'item-{idx + 1}')
+
+            class_str = ' '.join(classes)
+            items_html.append(f'<div class="{class_str}">{item}</div>')
+        return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>'
+
+    except Exception as e:
+        logger.warning(f"Error formatting RSS items: {str(e)}")
+        # Fall back to original content
+        return rss_content
--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -344,7 +344,7 @@ label {
 }  
 }

-#notification-customisation {
+.grey-form-border {
  border: 1px solid var(--color-border-notification);
  padding: 0.5rem;
  border-radius: 5px;
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
--- a/changedetectionio/templates/_common_fields.html
+++ b/changedetectionio/templates/_common_fields.html
@@ -33,7 +33,7 @@
                                <div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div>
                            </div>
                        </div>
-                        <div id="notification-customisation" class="pure-control-group">
+                        <div class="pure-control-group grey-form-border">
                            <div class="pure-control-group">
                                {{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }}
                                <span class="pure-form-message-inline">Title for all notifications</span>
--- a/changedetectionio/tests/conftest.py
+++ b/changedetectionio/tests/conftest.py
@@ -29,16 +29,28 @@ def reportlog(pytestconfig):
    logger.remove(handler_id)


+def format_memory_human(bytes_value):
+    """Format memory in human-readable units (KB, MB, GB)"""
+    if bytes_value < 1024:
+        return f"{bytes_value} B"
+    elif bytes_value < 1024 ** 2:
+        return f"{bytes_value / 1024:.2f} KB"
+    elif bytes_value < 1024 ** 3:
+        return f"{bytes_value / (1024 ** 2):.2f} MB"
+    else:
+        return f"{bytes_value / (1024 ** 3):.2f} GB"
+
 def track_memory(memory_usage, ):
    process = psutil.Process(os.getpid())
    while not memory_usage["stop"]:
        current_rss = process.memory_info().rss
        memory_usage["peak"] = max(memory_usage["peak"], current_rss)
+        memory_usage["current"] = current_rss  # Keep updating current
        time.sleep(0.01)  # Adjust the sleep time as needed

@pytest.fixture(scope='function')
 def measure_memory_usage(request):
-    memory_usage = {"peak": 0, "stop": False}
+    memory_usage = {"peak": 0, "current": 0, "stop": False}
    tracker_thread = Thread(target=track_memory, args=(memory_usage,))
    tracker_thread.start()

@@ -47,16 +59,17 @@ def measure_memory_usage(request):
    memory_usage["stop"] = True
    tracker_thread.join()

-    # Note: ru_maxrss is in kilobytes on Unix-based systems
-    max_memory_used = memory_usage["peak"] / 1024  # Convert to MB
-    s = f"Peak memory used by the test {request.node.fspath} - '{request.node.name}': {max_memory_used:.2f} MB"
+    # Note: psutil returns RSS memory in bytes
+    peak_human = format_memory_human(memory_usage["peak"])
+
+    s = f"{time.time()} {request.node.fspath} - '{request.node.name}' - Peak memory: {peak_human}"
    logger.debug(s)

    with open("test-memory.log", 'a') as f:
        f.write(f"{s}\n")

    # Assert that the memory usage is less than 200MB
-#    assert max_memory_used < 150, f"Memory usage exceeded 200MB: {max_memory_used:.2f} MB"
+#    assert peak_memory_kb < 150 * 1024, f"Memory usage exceeded 150MB: {peak_human}"


 def cleanup(datastore_path):
--- a/changedetectionio/tests/custom_browser_url/test_custom_browser_url.py
+++ b/changedetectionio/tests/custom_browser_url/test_custom_browser_url.py
@@ -29,13 +29,8 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
    assert b"Settings updated." in res.data

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    if make_test_use_extra_browser:
--- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
+++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
@@ -2,7 +2,7 @@
 import json
 import os
 from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches


 def set_response():
@@ -98,6 +98,5 @@ def test_socks5(client, live_server, measure_memory_usage):
    )
    assert b"OK" in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

--- a/changedetectionio/tests/smtp/test_notification_smtp.py
+++ b/changedetectionio/tests/smtp/test_notification_smtp.py
@@ -5,7 +5,7 @@ import re
 from flask import url_for
 from changedetectionio.tests.util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, \
    wait_for_all_checks, \
-    set_longer_modified_response
+    set_longer_modified_response, delete_all_watches
 from changedetectionio.tests.util import extract_UUID_from_client
 import logging
 import base64
@@ -85,8 +85,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
    assert '(added) So let\'s see what happens.\r\n' in msg  # The plaintext part with \r\n
    assert 'Content-Type: text/html' in msg
    assert '(added) So let\'s see what happens.<br>' in msg  # the html part
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage):
@@ -179,5 +178,4 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
    assert '&lt;' not in msg
    assert 'Content-Type: text/html' in msg

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_access_control.py
+++ b/changedetectionio/tests/test_access_control.py
@@ -2,7 +2,7 @@ from .util import live_server_setup, wait_for_all_checks
 from flask import url_for
 import time

-def test_check_access_control(app, client, live_server):
+def test_check_access_control(app, client, live_server, measure_memory_usage):
    # Still doesnt work, but this is closer.
   #  live_server_setup(live_server) # Setup on conftest per function

--- a/changedetectionio/tests/test_add_replace_remove_filter.py
+++ b/changedetectionio/tests/test_add_replace_remove_filter.py
@@ -3,7 +3,7 @@
 import os.path

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
+from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches
 import time

 def set_original(excluding=None, add_line=None):
@@ -44,12 +44,8 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
    set_original()
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -107,14 +103,12 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
    res = client.get(url_for("watchlist.index"))
    assert b'has-unread-changes' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage):
    
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
    time.sleep(1)

    # Give the endpoint time to spin up
@@ -137,12 +131,8 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
    set_original()
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -187,5 +177,4 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
        assert b'-Oh yes please' in response
        assert '网站监测 内容更新了'.encode('utf-8') in response

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches

 import json
 import uuid
@@ -276,8 +276,7 @@ def test_access_denied(client, live_server, measure_memory_usage):
    assert res.status_code == 200

    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    res = client.post(
        url_for("settings.settings_page"),
@@ -385,8 +384,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
    assert b'Additional properties are not allowed' in res.data

    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_api_import(client, live_server, measure_memory_usage):
--- a/changedetectionio/tests/test_api_notifications.py
+++ b/changedetectionio/tests/test_api_notifications.py
@@ -4,7 +4,7 @@ from flask import url_for
 from .util import live_server_setup
 import json

-def test_api_notifications_crud(client, live_server):
+def test_api_notifications_crud(client, live_server, measure_memory_usage):
   #  live_server_setup(live_server) # Setup on conftest per function
    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

--- a/changedetectionio/tests/test_api_search.py
+++ b/changedetectionio/tests/test_api_search.py
@@ -6,7 +6,7 @@ import time
 from .util import live_server_setup, wait_for_all_checks


-def test_api_search(client, live_server):
+def test_api_search(client, live_server, measure_memory_usage):
   #  live_server_setup(live_server) # Setup on conftest per function
    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

--- a/changedetectionio/tests/test_auth.py
+++ b/changedetectionio/tests/test_auth.py
@@ -12,12 +12,8 @@ def test_basic_auth(client, live_server, measure_memory_usage):
    # This page will echo back any auth info
    test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
    time.sleep(1)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    time.sleep(1)
    # Check form validation
--- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py
+++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py
@@ -86,12 +86,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Should get a notice that it's available
@@ -129,12 +125,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
    assert b'ldjson-price-track-offer' not in res.data
@@ -146,12 +138,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
 def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):

    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    for k,v in client.application.config.get('DATASTORE').data['watching'].items():
--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -3,7 +3,7 @@
 import time
 from flask import url_for
 from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
-    extract_UUID_from_client
+    extract_UUID_from_client, delete_all_watches

 sleep_time_for_fetch_thread = 3

@@ -163,10 +163,23 @@ def test_check_basic_change_detection_functionality(client, live_server, measure

    #
    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_non_text_mime_or_downloads(client, live_server, measure_memory_usage):
+    """
+
+    https://github.com/dgtlmoon/changedetection.io/issues/3434
+    I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
+    but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
+    changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
+
+    WHAT THIS DOES - makes the system rely on 'magic' to determine what is it
+
+    :param client:
+    :param live_server:
+    :param measure_memory_usage:
+    :return:
+    """
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write("""some random text that should be split by line
 and not parsed with html_to_text
@@ -179,13 +192,8 @@ got it\r\n
    test_url = url_for('test_endpoint', content_type="application/octet-stream", _external=True)

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -213,5 +221,121 @@ got it\r\n
    assert b"some random text that should be split by line\n" in res.data


-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
+    delete_all_watches(client)

+
+def test_standard_text_plain(client, live_server, measure_memory_usage):
+    """
+
+    https://github.com/dgtlmoon/changedetection.io/issues/3434
+    I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
+    but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
+    changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
+
+    The real bug here can be that it will try to process plain-text as HTML, losing <etc>
+
+    :param client:
+    :param live_server:
+    :param measure_memory_usage:
+    :return:
+    """
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""some random text that should be split by line
+and not parsed with html_to_text
+<title>Even this title should stay because we are just plain text</title>
+this way we know that it correctly parsed as plain text
+\r\n
+ok\r\n
+got it\r\n
+""")
+
+    test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
+
+    # Add our URL to the import page
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+
+    wait_for_all_checks(client)
+
+    ### check the front end
+    res = client.get(
+        url_for("ui.ui_views.preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert b"some random text that should be split by line\n" in res.data
+    ####
+
+    # Check the snapshot by API that it has linefeeds too
+    watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+    res = client.get(
+        url_for("watchhistory", uuid=watch_uuid),
+        headers={'x-api-key': api_key},
+    )
+
+    # Fetch a snapshot by timestamp, check the right one was found
+    res = client.get(
+        url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]),
+        headers={'x-api-key': api_key},
+    )
+    assert b"some random text that should be split by line\n" in res.data
+    assert b"<title>Even this title should stay because we are just plain text</title>" in res.data
+
+    delete_all_watches(client)
+
+# Server says its plaintext, we should always treat it as plaintext
+def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage):
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""<?xml version="1.0" encoding="utf-8"?>
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!--Activity and fragment titles-->
+    <string name="feed_update_receiver_name">Abonnementen bijwerken</string>
+</resources>
+""")
+
+    test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
+
+    # Add our URL to the import page
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("ui.ui_views.preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert b'&lt;string name=&#34;feed_update_receiver_name&#34;' in res.data
+
+    delete_all_watches(client)
+
+# Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
+def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server, measure_memory_usage):
+
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""<?xml version="1.0" encoding="utf-8"?>
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!--Activity and fragment titles-->
+    <string name="feed_update_receiver_name">Abonnementen bijwerken</string>
+    <foobar>ok man</foobar>
+</resources>
+""")
+
+    test_url=url_for('test_endpoint', content_type="text/plain", _external=True)
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": ['//string']})
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("ui.ui_views.preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert b'&lt;string name=&#34;feed_update_receiver_name&#34;' in res.data
+    assert b'&lt;foobar' not in res.data
+
+    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
--- a/changedetectionio/tests/test_block_while_text_present.py
+++ b/changedetectionio/tests/test_block_while_text_present.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches
 from changedetectionio import html_tools

 def set_original_ignore_response():
@@ -70,12 +70,8 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -144,5 +140,4 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu



-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_clone.py
+++ b/changedetectionio/tests/test_clone.py
@@ -14,12 +14,8 @@ def test_clone_functionality(client, live_server, measure_memory_usage):
    test_url = url_for('test_endpoint', _external=True)

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # So that we can be sure the same history doesnt carry over
--- a/changedetectionio/tests/test_conditions.py
+++ b/changedetectionio/tests/test_conditions.py
@@ -3,7 +3,7 @@ import json
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches
 from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT


@@ -47,11 +47,11 @@ def set_number_out_of_range_response(number="150"):
        f.write(test_return_data)


-# def test_setup(client, live_server):
+# def test_setup(client, live_server, measure_memory_usage):
    """Test that both text and number conditions work together with AND logic."""
   #  live_server_setup(live_server) # Setup on conftest per function

-def test_conditions_with_text_and_number(client, live_server):
+def test_conditions_with_text_and_number(client, live_server, measure_memory_usage):
    """Test that both text and number conditions work together with AND logic."""
    
    set_original_response("50")
@@ -60,12 +60,8 @@ def test_conditions_with_text_and_number(client, live_server):
    test_url = url_for('test_endpoint', _external=True)

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Configure the watch with two conditions connected with AND:
@@ -143,23 +139,18 @@ def test_conditions_with_text_and_number(client, live_server):
    res = client.get(url_for("watchlist.index"))
    assert b'has-unread-changes' not in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 # The 'validate' button next to each rule row
-def test_condition_validate_rule_row(client, live_server):
+def test_condition_validate_rule_row(client, live_server, measure_memory_usage):

    set_original_response("50")

    test_url = url_for('test_endpoint', _external=True)

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
@@ -230,12 +221,8 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
--- a/changedetectionio/tests/test_css_selector.py
+++ b/changedetectionio/tests/test_css_selector.py
@@ -81,12 +81,8 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    time.sleep(sleep_time_for_fetch_thread)
@@ -138,12 +134,8 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Goto the edit page, add our ignore text
@@ -193,12 +185,8 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Goto the edit page, add our ignore text
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@@ -5,7 +5,7 @@ import time
 from flask import url_for

 from ..html_tools import *
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches



@@ -209,48 +209,32 @@ def test_element_removal_full(client, live_server, measure_memory_usage):

 # Re #2752
 def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage):
-    

    set_response_with_multiple_index()
-    subtractive_selectors_data = ["""
-body > table > tr:nth-child(1) > th:nth-child(2)
+    subtractive_selectors_data = [
+### css style ###
+"""body > table > tr:nth-child(1) > th:nth-child(2)
 body > table >  tr:nth-child(2) > td:nth-child(2)
 body > table > tr:nth-child(3) > td:nth-child(2)
 body > table > tr:nth-child(1) > th:nth-child(3)
 body > table >  tr:nth-child(2) > td:nth-child(3)
 body > table > tr:nth-child(3) > td:nth-child(3)""",
+### second type, xpath ###
 """//body/table/tr[1]/th[2]
 //body/table/tr[2]/td[2]
 //body/table/tr[3]/td[2]
 //body/table/tr[1]/th[3]
 //body/table/tr[2]/td[3]
 //body/table/tr[3]/td[3]"""]
+    
+    test_url = url_for("test_endpoint", _external=True)

    for selector_list in subtractive_selectors_data:

-        res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-        assert b'Deleted' in res.data
+        delete_all_watches(client)

-        # Add our URL to the import page
-        test_url = url_for("test_endpoint", _external=True)
-        res = client.post(
-            url_for("imports.import_page"), data={"urls": test_url}, follow_redirects=True
-        )
-        assert b"1 Imported" in res.data
-        wait_for_all_checks(client)
-
-        res = client.post(
-            url_for("ui.ui_edit.edit_page", uuid="first"),
-            data={
-                "subtractive_selectors": selector_list,
-                "url": test_url,
-                "tags": "",
-                "fetch_backend": "html_requests",
-                "time_between_check_use_default": "y",
-            },
-            follow_redirects=True,
-        )
-        assert b"Updated watch." in res.data
+        uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"subtractive_selectors": selector_list.splitlines()})
+        client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
        wait_for_all_checks(client)

        res = client.get(
@@ -258,6 +242,7 @@ body > table > tr:nth-child(3) > td:nth-child(3)""",
            follow_redirects=True
        )

+        # the filters above should have removed this but they never say to remove the "emil" column
        assert b"Tobias" not in res.data
        assert b"Linus" not in res.data
        assert b"Person 2" not in res.data
--- a/changedetectionio/tests/test_encoding.py
+++ b/changedetectionio/tests/test_encoding.py
@@ -28,11 +28,8 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', content_type="text/html", _external=True)
-    client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -59,11 +56,8 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

--- a/changedetectionio/tests/test_errorhandling.py
+++ b/changedetectionio/tests/test_errorhandling.py
@@ -3,7 +3,7 @@
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches



@@ -19,12 +19,8 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
                       status_code=http_code,
                       _external=True)

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -47,8 +43,7 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
    #assert b'Error Screenshot' in res.data


-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_http_error_handler(client, live_server, measure_memory_usage):
@@ -56,8 +51,7 @@ def test_http_error_handler(client, live_server, measure_memory_usage):
    _runner_test_http_errors(client, live_server, 404, 'Page not found')
    _runner_test_http_errors(client, live_server, 500, '(Internal server error) received')
    _runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 # Just to be sure error text is properly handled
 def test_DNS_errors(client, live_server, measure_memory_usage):
@@ -87,8 +81,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage):
    assert found_name_resolution_error
    # Should always record that we tried
    assert bytes("just now".encode('utf-8')) in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 # Re 1513
 def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage):
@@ -145,5 +138,4 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us
    )
    assert not found_name_resolution_error

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_extract_regex.py
+++ b/changedetectionio/tests/test_extract_regex.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches

 from ..html_tools import *

@@ -76,12 +76,8 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -131,12 +127,8 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -212,12 +204,8 @@ def test_regex_error_handling(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    ### test regex error handling
    res = client.post(
@@ -231,5 +219,4 @@ def test_regex_error_handling(client, live_server, measure_memory_usage):

    assert b'is not a valid regular expression.' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@@ -42,13 +42,8 @@ def run_filter_test(client, live_server, content_filter):
    if os.path.isfile("test-datastore/notification.txt"):
        os.unlink("test-datastore/notification.txt")

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
--- a/changedetectionio/tests/test_group.py
+++ b/changedetectionio/tests/test_group.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client
+from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
 import os


@@ -127,8 +127,7 @@ def test_setup_group_tag(client, live_server, measure_memory_usage):
    assert b"should-be-excluded" not in res.data
    assert res.status_code == 200
    assert b"first-imported=1" in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_tag_import_singular(client, live_server, measure_memory_usage):
    
@@ -147,8 +146,7 @@ def test_tag_import_singular(client, live_server, measure_memory_usage):
    )
    # Should be only 1 tag because they both had the same
    assert res.data.count(b'test-tag') == 1
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_tag_add_in_ui(client, live_server, measure_memory_usage):
    
@@ -164,8 +162,7 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage):
    res = client.get(url_for("tags.delete_all"), follow_redirects=True)
    assert b'All tags deleted' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_group_tag_notification(client, live_server, measure_memory_usage):
    
@@ -232,8 +229,7 @@ def test_group_tag_notification(client, live_server, measure_memory_usage):

    #@todo Test that multiple notifications fired
    #@todo Test that each of multiple notifications with different settings
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_limit_tag_ui(client, live_server, measure_memory_usage):

@@ -264,15 +260,12 @@ def test_limit_tag_ui(client, live_server, measure_memory_usage):
    client.get(url_for('ui.mark_all_viewed', tag=tag_uuid), follow_redirects=True)
    wait_for_all_checks(client)

-    with open('/tmp/fuck.html', 'wb') as f:
-        f.write(res.data)
    # Should be only 1 unviewed
    res = client.get(url_for("watchlist.index"))
    assert res.data.count(b' unviewed ') == 1


-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
    res = client.get(url_for("tags.delete_all"), follow_redirects=True)
    assert b'All tags deleted' in res.data

@@ -299,8 +292,7 @@ def test_clone_tag_on_import(client, live_server, measure_memory_usage):
    # 2 times plus the top link to tag
    assert res.data.count(b'test-tag') == 3
    assert res.data.count(b'another-tag') == 3
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage):
    
@@ -327,8 +319,7 @@ def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usa
    # 2 times plus the top link to tag
    assert res.data.count(b'test-tag') == 3
    assert res.data.count(b'another-tag') == 3
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    res = client.get(url_for("tags.delete_all"), follow_redirects=True)
    assert b'All tags deleted' in res.data
@@ -391,12 +382,8 @@ def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measu
        f.write(d)

    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    filters = [
@@ -482,5 +469,4 @@ the {test} appeared before. {test in res.data[:n]=}
        """
        n += t_index + len(test)

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_history_consistency.py
+++ b/changedetectionio/tests/test_history_consistency.py
@@ -3,9 +3,8 @@
 import time
 import os
 import json
-import logging
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import wait_for_all_checks, delete_all_watches
 from urllib.parse import urlparse, parse_qs

 def test_consistent_history(client, live_server, measure_memory_usage):
@@ -81,19 +80,15 @@ def test_consistent_history(client, live_server, measure_memory_usage):
        assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"


-def test_check_text_history_view(client, live_server):
+def test_check_text_history_view(client, live_server, measure_memory_usage):

    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write("<html>test-one</html>")

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -122,5 +117,4 @@ def test_check_text_history_view(client, live_server):
    assert b'test-two' in res.data
    assert b'test-one' not in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_ignore.py
+++ b/changedetectionio/tests/test_ignore.py
@@ -27,12 +27,8 @@ def test_ignore(client, live_server, measure_memory_usage):
   #  live_server_setup(live_server) # Setup on conftest per function
    set_original_ignore_response()
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -79,12 +75,8 @@ def test_strip_ignore_lines(client, live_server, measure_memory_usage):
    assert b"Settings updated." in res.data

    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches
 from changedetectionio import html_tools


@@ -97,12 +97,8 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -163,8 +159,7 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
    # it is only ignored, it is not removed (it will be highlighted too)
    assert b'new ignore stuff' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 # When adding some ignore text, it should not trigger a change, even if something else on that line changes
 def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
@@ -192,12 +187,8 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
        # Switch to source mode so we can test that too!
        test_url = "source:"+test_url

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -251,13 +242,12 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
    res = client.get(url_for("watchlist.index"))
    assert b'has-unread-changes' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

-def test_check_global_ignore_text_functionality(client, live_server):
+def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
    
    _run_test_global_ignore(client, as_source=False)

-def test_check_global_ignore_text_functionality_as_source(client, live_server):
+def test_check_global_ignore_text_functionality_as_source(client, live_server, measure_memory_usage):
    
    _run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')
--- a/changedetectionio/tests/test_ignorehyperlinks.py
+++ b/changedetectionio/tests/test_ignorehyperlinks.py
@@ -3,9 +3,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
-
-
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches


 def set_original_ignore_response():
@@ -117,7 +115,5 @@ def test_render_anchor_tag_content_true(client, live_server, measure_memory_usag
    assert b"/test-endpoint" in res.data

    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"),
-                     follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

--- a/changedetectionio/tests/test_ignorestatuscode.py
+++ b/changedetectionio/tests/test_ignorestatuscode.py
@@ -60,12 +60,8 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -94,12 +90,8 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu

    # Add our URL to the import page
    test_url = url_for('test_endpoint', status_code=403, _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    time.sleep(sleep_time_for_fetch_thread)
--- a/changedetectionio/tests/test_ignorewhitespace.py
+++ b/changedetectionio/tests/test_ignorewhitespace.py
@@ -70,12 +70,8 @@ def test_check_ignore_whitespace(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    time.sleep(sleep_time_for_fetch_thread)
    # Trigger a check
--- a/changedetectionio/tests/test_import.py
+++ b/changedetectionio/tests/test_import.py
@@ -5,7 +5,7 @@ import time

 from flask import url_for

-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches


 # def test_setup(client, live_server, measure_memory_usage):
@@ -28,7 +28,7 @@ https://example.com tag1, other tag"""
    assert b"3 Imported" in res.data
    assert b"tag1" in res.data
    assert b"other tag" in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
+    delete_all_watches(client)

    # Clear flask alerts
    res = client.get( url_for("watchlist.index"))
@@ -53,7 +53,7 @@ def xtest_import_skip_url(client, live_server, measure_memory_usage):
    assert b"1 Imported" in res.data
    assert b"ht000000broken" in res.data
    assert b"1 Skipped" in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
+    delete_all_watches(client)
    # Clear flask alerts
    res = client.get( url_for("watchlist.index"))

@@ -119,7 +119,7 @@ def test_import_distillio(client, live_server, measure_memory_usage):
    assert b"nice stuff" in res.data
    assert b"nerd-news" in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
+    delete_all_watches(client)
    # Clear flask alerts
    res = client.get(url_for("watchlist.index"))

@@ -169,8 +169,7 @@ def test_import_custom_xlsx(client, live_server, measure_memory_usage):
            assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]'
            assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0}

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_import_watchete_xlsx(client, live_server, measure_memory_usage):
    """Test can upload a excel spreadsheet and the watches are created correctly"""
@@ -214,5 +213,4 @@ def test_import_watchete_xlsx(client, live_server, measure_memory_usage):
        if watch.get('title') == 'system default website':
            assert watch.get('fetch_backend') == 'system' # uses default if blank

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -3,7 +3,7 @@

 import time
 from flask import url_for, escape
-from . util import live_server_setup, wait_for_all_checks
+from . util import live_server_setup, wait_for_all_checks, delete_all_watches
 import pytest
 jq_support = True

@@ -113,14 +113,8 @@ def set_original_ext_response():
    return None

 def set_modified_ext_response():
-    data = """
-    [
-    {
-        "isPriceLowered": false,
-        "status": "Sold",
-        "statusOrig": "sold"
-    },
-    {
+    # This should get reformatted
+    data = """ [ { "isPriceLowered": false,  "status": "Sold",  "statusOrig": "sold" }, {
        "_id": "5e7b3e1fb3262d306323ff1e",
        "listingsType": "consumer",
        "isPriceLowered": false,
@@ -205,16 +199,10 @@ def test_check_json_without_filter(client, live_server, measure_memory_usage):
    # and be sure it doesn't get chewed up by instriptis
    set_json_response_with_html()

-    # Give the endpoint time to spin up
-    time.sleep(1)
-
    # Add our URL to the import page
    test_url = url_for('test_endpoint', content_type="application/json", _external=True)
-    client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -228,45 +216,23 @@ def test_check_json_without_filter(client, live_server, measure_memory_usage):
    assert b'&#34;html&#34;: &#34;&lt;b&gt;&#34;' in res.data
    assert res.data.count(b'{') >= 2

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def check_json_filter(json_filter, client, live_server):
    set_original_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', content_type="application/json", _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": json_filter.splitlines()})
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)

-    # Goto the edit page, add our ignore text
-    # Add our URL to the import page
-    res = client.post(
-        url_for("ui.ui_edit.edit_page", uuid="first"),
-        data={"include_filters": json_filter,
-              "url": test_url,
-              "tags": "",
-              "headers": "",
-              "fetch_backend": "html_requests",
-              "time_between_check_use_default": "y"
-              },
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data
-
    # Check it saved
    res = client.get(
-        url_for("ui.ui_edit.edit_page", uuid="first"),
+        url_for("ui.ui_edit.edit_page", uuid=uuid),
    )
    assert bytes(escape(json_filter).encode('utf-8')) in res.data

@@ -285,14 +251,13 @@ def check_json_filter(json_filter, client, live_server):
    assert b'has-unread-changes' in res.data

    # Should not see this, because its not in the JSONPath we entered
-    res = client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
+    res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid))

    # But the change should be there, tho its hard to test the change was detected because it will show old and new versions
    # And #462 - check we see the proper utf-8 string there
    assert "Örnsköldsvik".encode('utf-8') in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_check_jsonpath_filter(client, live_server, measure_memory_usage):
    check_json_filter('json:boss.name', client, live_server)
@@ -308,36 +273,12 @@ def test_check_jqraw_filter(client, live_server, measure_memory_usage):
 def check_json_filter_bool_val(json_filter, client, live_server):
    set_original_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)
-
    test_url = url_for('test_endpoint', content_type="application/json", _external=True)

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
-
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": [json_filter]})
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
-    # Goto the edit page, add our ignore text
-    # Add our URL to the import page
-    res = client.post(
-        url_for("ui.ui_edit.edit_page", uuid="first"),
-        data={"include_filters": json_filter,
-              "url": test_url,
-              "tags": "",
-              "headers": "",
-              "fetch_backend": "html_requests",
-              "time_between_check_use_default": "y"
-              },
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data

-    # Give the thread time to pick it up
-    wait_for_all_checks(client)
    #  Make a change
    set_modified_response()

@@ -350,8 +291,7 @@ def check_json_filter_bool_val(json_filter, client, live_server):
    # But the change should be there, tho its hard to test the change was detected because it will show old and new versions
    assert b'false' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_check_jsonpath_filter_bool_val(client, live_server, measure_memory_usage):
    check_json_filter_bool_val("json:$['available']", client, live_server)
@@ -372,25 +312,16 @@ def test_check_jqraw_filter_bool_val(client, live_server, measure_memory_usage):
 def check_json_ext_filter(json_filter, client, live_server):
    set_original_ext_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)
-
    # Add our URL to the import page
    test_url = url_for('test_endpoint', content_type="application/json", _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
-
-    # Give the thread time to pick it up
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Goto the edit page, add our ignore text
    # Add our URL to the import page
    res = client.post(
-        url_for("ui.ui_edit.edit_page", uuid="first"),
+        url_for("ui.ui_edit.edit_page", uuid=uuid),
        data={"include_filters": json_filter,
              "url": test_url,
              "tags": "",
@@ -404,7 +335,7 @@ def check_json_ext_filter(json_filter, client, live_server):

    # Check it saved
    res = client.get(
-        url_for("ui.ui_edit.edit_page", uuid="first"),
+        url_for("ui.ui_edit.edit_page", uuid=uuid),
    )
    assert bytes(escape(json_filter).encode('utf-8')) in res.data

@@ -418,6 +349,12 @@ def check_json_ext_filter(json_filter, client, live_server):
    # Give the thread time to pick it up
    wait_for_all_checks(client)

+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    dates = list(watch.history.keys())
+    snapshot_contents = watch.get_history_snapshot(dates[0])
+
+    assert snapshot_contents[0] == '['
+
    # It should have 'has-unread-changes'
    res = client.get(url_for("watchlist.index"))
    assert b'has-unread-changes' in res.data
@@ -436,8 +373,7 @@ def check_json_ext_filter(json_filter, client, live_server):
    assert b'ForSale' in res.data
    assert b'Sold' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_ignore_json_order(client, live_server, measure_memory_usage):
    # A change in order shouldn't trigger a notification
@@ -448,12 +384,8 @@ def test_ignore_json_order(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', content_type="application/json", _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -478,24 +410,19 @@ def test_ignore_json_order(client, live_server, measure_memory_usage):
    res = client.get(url_for("watchlist.index"))
    assert b'has-unread-changes' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_correct_header_detect(client, live_server, measure_memory_usage):
    # Like in https://github.com/dgtlmoon/changedetection.io/pull/1593
    # Specify extra html that JSON is sometimes wrapped in - when using SockpuppetBrowser / Puppeteer / Playwrightetc
    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write('<html><body>{"hello" : 123, "world": 123}')
+        f.write('<html><body>{ "world": 123, "hello" : 123}')

    # Add our URL to the import page
    # Check weird casing is cleaned up and detected also
    test_url = url_for('test_endpoint', content_type="aPPlication/JSon", uppercase_headers=True, _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))

@@ -507,11 +434,20 @@ def test_correct_header_detect(client, live_server, measure_memory_usage):
        follow_redirects=True
    )

-    assert b'&#34;hello&#34;: 123,' in res.data
-    assert b'&#34;world&#34;: 123' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    dates = list(watch.history.keys())
+    snapshot_contents = watch.get_history_snapshot(dates[0])
+
+    assert b'&#34;hello&#34;: 123,' in res.data # properly html escaped in the front end
+
+    # Should be correctly formatted and sorted,  ("world" goes to end)
+    assert snapshot_contents == """{
+    "hello": 123,
+    "world": 123
+}"""
+
+    delete_all_watches(client)

 def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage):
    check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
--- a/changedetectionio/tests/test_live_preview.py
+++ b/changedetectionio/tests/test_live_preview.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3

 from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches


 def set_response():
@@ -75,5 +75,4 @@ def test_content_filter_live_preview(client, live_server, measure_memory_usage):
    assert reply.get('ignore_line_numbers') == [2]  # Ignored - "socks" on line 2
    assert reply.get('trigger_line_numbers') == [1]  # Triggers "Awesome" in line 1

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_nonrenderable_pages.py
+++ b/changedetectionio/tests/test_nonrenderable_pages.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3

 from flask import url_for
-from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, delete_all_watches
 import time


@@ -113,6 +113,5 @@ def test_check_basic_change_detection_functionality(client, live_server, measure

    #
    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

--- a/changedetectionio/tests/test_obfuscations.py
+++ b/changedetectionio/tests/test_obfuscations.py
@@ -24,12 +24,8 @@ def test_obfuscations(client, live_server, measure_memory_usage):
    time.sleep(1)
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    time.sleep(3)
--- a/changedetectionio/tests/test_pdf.py
+++ b/changedetectionio/tests/test_pdf.py
@@ -8,30 +8,30 @@ from .util import set_original_response, set_modified_response, live_server_setu
 # `subtractive_selectors` should still work in `source:` type requests
 def test_fetch_pdf(client, live_server, measure_memory_usage):
    import shutil
+    import os
+
    shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
+    first_version_size = os.path.getsize("test-datastore/endpoint-test.pdf")

-   #  live_server_setup(live_server) # Setup on conftest per function
    test_url = url_for('test_pdf_endpoint', _external=True)
-    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

-    res = client.get(
-        url_for("ui.ui_views.preview_page", uuid="first"),
-        follow_redirects=True
-    )
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    dates = list(watch.history.keys())
+    snapshot_contents = watch.get_history_snapshot(dates[0])

    # PDF header should not be there (it was converted to text)
-    assert b'PDF' not in res.data[:10]
-    assert b'hello world' in res.data
+    assert 'PDF' not in snapshot_contents
+    # Was converted away from HTML
+    assert 'pdftohtml' not in snapshot_contents.lower() # Generator tag shouldnt be there
+    assert f'Original file size - {first_version_size}' in snapshot_contents
+    assert 'html' not in snapshot_contents.lower() # is converted from html
+    assert 'body' not in snapshot_contents.lower()  # is converted from html
+    # And our text content was there
+    assert 'hello world' in snapshot_contents

    # So we know if the file changes in other ways
    import hashlib
@@ -39,8 +39,7 @@ def test_fetch_pdf(client, live_server, measure_memory_usage):
    # We should have one
    assert len(original_md5) >0
    # And it's going to be in the document
-    assert b'Document checksum - '+bytes(str(original_md5).encode('utf-8')) in res.data
-
+    assert f'Document checksum - {original_md5}' in snapshot_contents

    shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
    changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
@@ -63,7 +62,6 @@ def test_fetch_pdf(client, live_server, measure_memory_usage):
    assert original_md5.encode('utf-8') not in res.data
    assert changed_md5.encode('utf-8') in res.data

-
    res = client.get(
        url_for("ui.ui_views.diff_history_page", uuid="first"),
        follow_redirects=True
@@ -71,6 +69,16 @@ def test_fetch_pdf(client, live_server, measure_memory_usage):

    assert original_md5.encode('utf-8') in res.data
    assert changed_md5.encode('utf-8') in res.data
-
    assert b'here is a change' in res.data
+
+
+    dates = list(watch.history.keys())
+    # new snapshot was also OK, no HTML
+    snapshot_contents = watch.get_history_snapshot(dates[1])
+    assert 'html' not in snapshot_contents.lower()
+    assert f'Original file size - {os.path.getsize("test-datastore/endpoint-test.pdf")}' in snapshot_contents
+    assert f'here is a change' in snapshot_contents
+    assert os.path.getsize("test-datastore/endpoint-test.pdf") != first_version_size # And the disk change worked
+
+
    
--- a/changedetectionio/tests/test_preview_endpoints.py
+++ b/changedetectionio/tests/test_preview_endpoints.py
@@ -13,13 +13,8 @@ def test_fetch_pdf(client, live_server, measure_memory_usage):
   #  live_server_setup(live_server) # Setup on conftest per function
    test_url = url_for('test_pdf_endpoint', _external=True)
    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

--- a/changedetectionio/tests/test_request.py
+++ b/changedetectionio/tests/test_request.py
@@ -2,7 +2,7 @@ import json
 import os
 import time
 from flask import url_for
-from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches



@@ -17,21 +17,13 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
        test_url = test_url.replace('localhost', 'changedet')

    # Add the test URL twice, we will check
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)
    cookie_header = '_ga=GA1.2.1022228332; cookie-preferences=analytics:accepted;'
@@ -82,8 +74,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
    for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
        assert 'custom' in watch.get('remote_server_reply') # added in util.py

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_body_in_request(client, live_server, measure_memory_usage):

@@ -93,12 +84,8 @@ def test_body_in_request(client, live_server, measure_memory_usage):
        # Because its no longer calling back to localhost but from the browser container, set in test-only.yml
        test_url = test_url.replace('localhost', 'cdio')

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -150,12 +137,8 @@ def test_body_in_request(client, live_server, measure_memory_usage):

    ####### data sanity checks
    # Add the test URL twice, we will check
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    watches_with_body = 0
    with open('test-datastore/url-watches.json') as f:
@@ -180,8 +163,7 @@ def test_body_in_request(client, live_server, measure_memory_usage):
        follow_redirects=True
    )
    assert b"Body must be empty when Request Method is set to GET" in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_method_in_request(client, live_server, measure_memory_usage):
    # Add our URL to the import page
@@ -191,20 +173,12 @@ def test_method_in_request(client, live_server, measure_memory_usage):
        test_url = test_url.replace('localhost', 'cdio')

    # Add the test URL twice, we will check
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -258,8 +232,7 @@ def test_method_in_request(client, live_server, measure_memory_usage):
    # Should be only one with method set to PATCH
    assert watches_with_method == 1

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 # Re #2408 - user-agent override test, also should handle case-insensitive header deduplication
 def test_ua_global_override(client, live_server, measure_memory_usage):
@@ -277,12 +250,8 @@ def test_ua_global_override(client, live_server, measure_memory_usage):
    )
    assert b'Settings updated' in res.data

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)
    res = client.get(
@@ -315,8 +284,7 @@ def test_ua_global_override(client, live_server, measure_memory_usage):
    )
    assert b"agent-from-watch" in res.data
    assert b"html-requests-user-agent" not in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
    
@@ -356,12 +324,8 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
    assert b"requests-default_ua-html_requests" in res.data

    # Add the test URL twice, we will check
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -429,19 +393,14 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
        assert "User-Agent:".encode('utf-8') + requests_ua.encode('utf-8') in res.data

    # unlink headers.txt on start/stop
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

-def test_headers_validation(client, live_server):
+def test_headers_validation(client, live_server, measure_memory_usage):
    

    test_url = url_for('test_headers', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first"),
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, extract_UUID_from_client
+from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, extract_UUID_from_client, delete_all_watches
 from ..notification import default_notification_format

 instock_props = [
@@ -44,11 +44,11 @@ def set_original_response(props_markup='', price="121.95"):



-# def test_setup(client, live_server):
+# def test_setup(client, live_server, measure_memory_usage):

   #  live_server_setup(live_server) # Setup on conftest per function

-def test_restock_itemprop_basic(client, live_server):
+def test_restock_itemprop_basic(client, live_server, measure_memory_usage):

    

@@ -69,8 +69,7 @@ def test_restock_itemprop_basic(client, live_server):
        assert b'has-restock-info' in res.data
        assert b' in-stock' in res.data
        assert b' not-in-stock' not in res.data
-        res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-        assert b'Deleted' in res.data
+        delete_all_watches(client)


    for p in out_of_stock_props:
@@ -85,10 +84,9 @@ def test_restock_itemprop_basic(client, live_server):

        assert b'has-restock-info not-in-stock' in res.data

-        res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-        assert b'Deleted' in res.data
+        delete_all_watches(client)

-def test_itemprop_price_change(client, live_server):
+def test_itemprop_price_change(client, live_server, measure_memory_usage):
    

    # Out of the box 'Follow price changes' should be ON
@@ -132,13 +130,11 @@ def test_itemprop_price_change(client, live_server):
    assert b'has-unread-changes' not in res.data


-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def _run_test_minmax_limit(client, extra_watch_edit_form):

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    test_url = url_for('test_endpoint', _external=True)

@@ -212,11 +208,10 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
    assert b'1,890.45' in res.data or b'1890.45' in res.data
    assert b'has-unread-changes' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


-def test_restock_itemprop_minmax(client, live_server):
+def test_restock_itemprop_minmax(client, live_server, measure_memory_usage):
    
    extras = {
        "restock_settings-follow_price_changes": "y",
@@ -225,7 +220,7 @@ def test_restock_itemprop_minmax(client, live_server):
    }
    _run_test_minmax_limit(client, extra_watch_edit_form=extras)

-def test_restock_itemprop_with_tag(client, live_server):
+def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage):
    

    res = client.post(
@@ -254,11 +249,10 @@ def test_restock_itemprop_with_tag(client, live_server):



-def test_itemprop_percent_threshold(client, live_server):
+def test_itemprop_percent_threshold(client, live_server, measure_memory_usage):
    

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    test_url = url_for('test_endpoint', _external=True)

@@ -317,12 +311,11 @@ def test_itemprop_percent_threshold(client, live_server):



-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)



-def test_change_with_notification_values(client, live_server):
+def test_change_with_notification_values(client, live_server, measure_memory_usage):
    

    if os.path.isfile("test-datastore/notification.txt"):
@@ -390,11 +383,10 @@ def test_change_with_notification_values(client, live_server):
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"


-def test_data_sanity(client, live_server):
+def test_data_sanity(client, live_server, measure_memory_usage):
    

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    test_url = url_for('test_endpoint', _external=True)
    test_url2 = url_for('test_endpoint2', _external=True)
@@ -421,8 +413,7 @@ def test_data_sanity(client, live_server):
    assert str(res.data.decode()).count("950.95") == 1, "Price should only show once (for the watch added, no other watches yet)"

    ## different test, check the edit page works on an empty request result
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    client.post(
        url_for("ui.ui_views.form_quick_watch_add"),
@@ -435,11 +426,10 @@ def test_data_sanity(client, live_server):
        url_for("ui.ui_edit.edit_page", uuid="first"))
    assert test_url2.encode('utf-8') in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 # All examples should give a prive of 666.66
-def test_special_prop_examples(client, live_server):
+def test_special_prop_examples(client, live_server, measure_memory_usage):
    import glob
    

--- a/changedetectionio/tests/test_rss.py
+++ b/changedetectionio/tests/test_rss.py
@@ -3,7 +3,7 @@
 import time
 from flask import url_for
 from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
-    extract_UUID_from_client
+    extract_UUID_from_client, delete_all_watches


 def set_original_cdata_xml():
@@ -110,17 +110,13 @@ def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage):
    

    set_original_cdata_xml()
-
-    test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
+    # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
+    # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
+    test_url = url_for('test_endpoint', content_type="text/xml; charset=UTF-8", _external=True)

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

@@ -132,14 +128,14 @@ def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage):
    assert b'<![' not in res.data
    assert b'Hackers can access your computer' in res.data
    assert b'The days of Terminator' in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
+    delete_all_watches(client)

 def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
    

    set_original_cdata_xml()

-    test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
+    test_url = url_for('test_endpoint', content_type="application/atom+xml; charset=UTF-8", _external=True)

    res = client.post(
        url_for("ui.ui_views.form_quick_watch_add"),
@@ -180,10 +176,10 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
    assert b'The days of Terminator' not in res.data # Should NOT be selected by the xpath
    assert b'Some other description' not in res.data  # Should NOT be selected by the xpath

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
+    delete_all_watches(client)


-def test_rss_bad_chars_breaking(client, live_server):
+def test_rss_bad_chars_breaking(client, live_server, measure_memory_usage):
    """This should absolutely trigger the RSS builder to go into worst state mode

    - source: prefix means no html conversion (which kinda filters out the bad stuff)
--- a/changedetectionio/tests/test_rss_reader_mode.py
+++ b/changedetectionio/tests/test_rss_reader_mode.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+
+import time
+from flask import url_for
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
+    extract_UUID_from_client, delete_all_watches
+
+
+def set_original_cdata_xml():
+    test_return_data = """<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
+<channel>
+<title>Security Bulletins on wetscale</title>
+<link>https://wetscale.com/security-bulletins/</link>
+<description>Recent security bulletins from wetscale</description>
+<lastBuildDate>Fri, 10 Oct 2025 14:58:11 GMT</lastBuildDate>
+<docs>https://validator.w3.org/feed/docs/rss2.html</docs>
+<generator>wetscale.com</generator>
+<language>en-US</language>
+<copyright>© 2025 wetscale Inc. All rights reserved.</copyright>
+<atom:link href="https://wetscale.com/security-bulletins/index.xml" rel="self" type="application/rss+xml"/>
+<item>
+<title>TS-2025-005</title>
+<link>https://wetscale.com/security-bulletins/#ts-2025-005</link>
+<guid>https://wetscale.com/security-bulletins/#ts-2025-005</guid>
+<pubDate>Thu, 07 Aug 2025 00:00:00 GMT</pubDate>
+<description><p>Wet noodles escape<br><p>they also found themselves outside</p> </description>
+</item>
+
+
+<item>
+<title>TS-2025-004</title>
+<link>https://wetscale.com/security-bulletins/#ts-2025-004</link>
+<guid>https://wetscale.com/security-bulletins/#ts-2025-004</guid>
+<pubDate>Tue, 27 May 2025 00:00:00 GMT</pubDate>
+<description>
+    <![CDATA[ <img class="type:primaryImage" src="https://testsite.com/701c981da04869e.jpg"/><p>The days of Terminator and The Matrix could be closer. But be positive.</p><p><a href="https://testsite.com">Read more link...</a></p> ]]>
+</description>
+</item>
+    </channel>
+    </rss>
+            """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+
+
+
+def test_rss_reader_mode(client, live_server, measure_memory_usage):
+    set_original_cdata_xml()
+
+    # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
+    # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
+    test_url = url_for('test_endpoint', content_type="text/xml; charset=UTF-8", _external=True)
+    live_server.app.config['DATASTORE'].data['settings']['application']['rss_reader_mode'] = True
+
+
+    # Add our URL to the import page
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+
+    wait_for_all_checks(client)
+
+
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    dates = list(watch.history.keys())
+    snapshot_contents = watch.get_history_snapshot(dates[0])
+    assert 'Wet noodles escape' in snapshot_contents
+    assert '<br>' not in snapshot_contents
+    assert '&lt;' not in snapshot_contents
+    assert 'The days of Terminator and The Matrix' in snapshot_contents
+    assert 'PubDate: Thu, 07 Aug 2025 00:00:00 GMT' in snapshot_contents
+    delete_all_watches(client)
+
+def test_rss_reader_mode_with_css_filters(client, live_server, measure_memory_usage):
+    set_original_cdata_xml()
+
+    # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
+    # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
+    test_url = url_for('test_endpoint', content_type="text/xml; charset=UTF-8", _external=True)
+    live_server.app.config['DATASTORE'].data['settings']['application']['rss_reader_mode'] = True
+
+
+    # Add our URL to the import page
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={'include_filters': [".last"]})
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+
+    wait_for_all_checks(client)
+
+
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    dates = list(watch.history.keys())
+    snapshot_contents = watch.get_history_snapshot(dates[0])
+    assert 'Wet noodles escape' not in snapshot_contents
+    assert '<br>' not in snapshot_contents
+    assert '&lt;' not in snapshot_contents
+    assert 'The days of Terminator and The Matrix' in snapshot_contents
+    delete_all_watches(client)
+
--- a/changedetectionio/tests/test_scheduler.py
+++ b/changedetectionio/tests/test_scheduler.py
@@ -5,11 +5,11 @@ from copy import copy
 from datetime import datetime, timezone
 from zoneinfo import ZoneInfo
 from flask import url_for
-from .util import  live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from .util import  live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches
 from ..forms import REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT, REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT


-# def test_setup(client, live_server):
+# def test_setup(client, live_server, measure_memory_usage):
   #  live_server_setup(live_server) # Setup on conftest per function

 def test_check_basic_scheduler_functionality(client, live_server, measure_memory_usage):
@@ -34,13 +34,8 @@ def test_check_basic_scheduler_functionality(client, live_server, measure_memory
    res = client.get(url_for("settings.settings_page"))
    assert b'Pacific/Kiritimati' in res.data

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))

@@ -92,8 +87,7 @@ def test_check_basic_scheduler_functionality(client, live_server, measure_memory
    assert live_server.app.config['DATASTORE'].data['watching'][uuid]['last_checked'] != last_check

    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_check_basic_global_scheduler_functionality(client, live_server, measure_memory_usage):
@@ -101,13 +95,8 @@ def test_check_basic_global_scheduler_functionality(client, live_server, measure
    days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
    test_url = url_for('test_random_content_endpoint', _external=True)

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))

@@ -180,18 +169,13 @@ def test_check_basic_global_scheduler_functionality(client, live_server, measure
    assert live_server.app.config['DATASTORE'].data['watching'][uuid]['last_checked'] != last_check

    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_validation_time_interval_field(client, live_server, measure_memory_usage):
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)


    res = client.post(
--- a/changedetectionio/tests/test_security.py
+++ b/changedetectionio/tests/test_security.py
@@ -1,7 +1,7 @@
 import os

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches
 from .. import strtobool


@@ -100,8 +100,7 @@ def _runner_test_various_file_slash(client, file_uri):
            # This will give some error from requests or if it went to chrome, will give some other error :-)
            assert any(s in res.data for s in substrings)

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_file_slash_access(client, live_server, measure_memory_usage):
    
--- a/changedetectionio/tests/test_share_watch.py
+++ b/changedetectionio/tests/test_share_watch.py
@@ -3,7 +3,7 @@
 import time
 from flask import url_for
 from urllib.request import urlopen
-from .util import set_original_response, set_modified_response, live_server_setup
+from .util import set_original_response, set_modified_response, live_server_setup, delete_all_watches
 import re

 sleep_time_for_fetch_thread = 3
@@ -17,13 +17,8 @@ def test_share_watch(client, live_server, measure_memory_usage):
    include_filters = ".nice-filter"

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Goto the edit page, add our ignore text
    # Add our URL to the import page
@@ -54,8 +49,7 @@ def test_share_watch(client, live_server, measure_memory_usage):

    # Now delete what we have, we will try to re-import it
    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    # Add our URL to the import page
    res = client.post(
--- a/changedetectionio/tests/test_source.py
+++ b/changedetectionio/tests/test_source.py
@@ -13,13 +13,8 @@ def test_check_basic_change_detection_functionality_source(client, live_server,
    set_original_response()
    test_url = 'source:'+url_for('test_endpoint', _external=True)
    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    time.sleep(sleep_time_for_fetch_thread)

@@ -62,13 +57,8 @@ def test_check_ignore_elements(client, live_server, measure_memory_usage):
    time.sleep(1)
    test_url = 'source:'+url_for('test_endpoint', _external=True)
    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)

--- a/changedetectionio/tests/test_trigger.py
+++ b/changedetectionio/tests/test_trigger.py
@@ -65,12 +65,8 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Trigger a check
    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
--- a/changedetectionio/tests/test_trigger_regex.py
+++ b/changedetectionio/tests/test_trigger_regex.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches


 def set_original_ignore_response():
@@ -30,12 +30,8 @@ def test_trigger_regex_functionality(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -76,5 +72,4 @@ def test_trigger_regex_functionality(client, live_server, measure_memory_usage):
    assert b'has-unread-changes' in res.data

    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_trigger_regex_with_filter.py
+++ b/changedetectionio/tests/test_trigger_regex_with_filter.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from . util import live_server_setup
+from . util import live_server_setup, delete_all_watches


 def set_original_ignore_response():
@@ -34,12 +34,8 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # it needs time to save the original version
    time.sleep(sleep_time_for_fetch_thread)
@@ -81,5 +77,4 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me
    assert b'has-unread-changes' in res.data

 # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_ui.py
+++ b/changedetectionio/tests/test_ui.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3

 from flask import url_for
-from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, delete_all_watches
 from ..forms import REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT, REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT


-def test_recheck_time_field_validation_global_settings(client, live_server):
+def test_recheck_time_field_validation_global_settings(client, live_server, measure_memory_usage):
    """
    Tests that the global settings time field has atleast one value for week/day/hours/minute/seconds etc entered
    class globalSettingsRequestForm(Form):
@@ -27,7 +27,7 @@ def test_recheck_time_field_validation_global_settings(client, live_server):
    assert REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT.encode('utf-8') in res.data


-def test_recheck_time_field_validation_single_watch(client, live_server):
+def test_recheck_time_field_validation_single_watch(client, live_server, measure_memory_usage):
    """
    Tests that the global settings time field has atleast one value for week/day/hours/minute/seconds etc entered
    class globalSettingsRequestForm(Form):
@@ -36,13 +36,8 @@ def test_recheck_time_field_validation_single_watch(client, live_server):
    test_url = url_for('test_endpoint', _external=True)

    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first"),
@@ -100,7 +95,7 @@ def test_recheck_time_field_validation_single_watch(client, live_server):
    assert b"Updated watch." in res.data
    assert REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT.encode('utf-8') not in res.data

-def test_checkbox_open_diff_in_new_tab(client, live_server):
+def test_checkbox_open_diff_in_new_tab(client, live_server, measure_memory_usage):
    
    set_original_response()
    # Add our URL to the import page
@@ -171,10 +166,9 @@ def test_checkbox_open_diff_in_new_tab(client, live_server):
    assert 'target=' not in target_line

    # Cleanup everything
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

-def test_page_title_listing_behaviour(client, live_server):
+def test_page_title_listing_behaviour(client, live_server, measure_memory_usage):

    set_original_response(extra_title="custom html")

@@ -249,7 +243,7 @@ def test_page_title_listing_behaviour(client, live_server):
    assert b"head titlecustom html" in res.data


-def test_ui_viewed_unread_flag(client, live_server):
+def test_ui_viewed_unread_flag(client, live_server, measure_memory_usage):

    import time

--- a/changedetectionio/tests/test_unique_lines.py
+++ b/changedetectionio/tests/test_unique_lines.py
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, delete_all_watches


 def set_original_ignore_response():
@@ -79,12 +79,8 @@ def test_unique_lines_functionality(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Add our URL to the import page
@@ -118,8 +114,7 @@ def test_unique_lines_functionality(client, live_server, measure_memory_usage):
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
    assert b'has-unread-changes' in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_sort_lines_functionality(client, live_server, measure_memory_usage):
    
@@ -128,12 +123,8 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Add our URL to the import page
@@ -168,8 +159,7 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
    assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
    assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
    
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_extra_filters(client, live_server, measure_memory_usage):
@@ -179,12 +169,8 @@ def test_extra_filters(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    # Add our URL to the import page
@@ -216,5 +202,4 @@ def test_extra_filters(client, live_server, measure_memory_usage):
    # still should remain unsorted ('A - sortable line') stays at the end
    assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines')

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)
--- a/changedetectionio/tests/test_watch_fields_storage.py
+++ b/changedetectionio/tests/test_watch_fields_storage.py
@@ -10,12 +10,8 @@ def test_check_watch_field_storage(client, live_server, measure_memory_usage):

    test_url = "http://somerandomsitewewatch.com"

-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)


    res = client.post(
--- a/changedetectionio/tests/test_xpath_selector.py
+++ b/changedetectionio/tests/test_xpath_selector.py
@@ -1,12 +1,42 @@
 # -*- coding: utf-8 -*-

-import time
+
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
-
-from ..html_tools import *
+from .util import  wait_for_all_checks, delete_all_watches
+from ..processors.magic import RSS_XML_CONTENT_TYPES


+def set_rss_atom_feed_response(header=''):
+    test_return_data = f"""{header}<!-- Generated on Wed, 08 Oct 2025 08:42:33 -0700, really really honestly  -->
+<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
+<channel>
+    <atom:link href="https://store.waterpowered.com/news/collection//" rel="self" type="application/rss+xml"/>
+    <title>RSS Feed</title>
+    <link>
+        <![CDATA[ https://store.waterpowered.com/news/collection// ]]>
+    </link>
+    <description>
+        <![CDATA[ Events and Announcements for ]]>
+    </description>
+    <language>en-us</language>
+    <generator>water News RSS</generator>
+    <item>
+        <title> 🍁 Lets go discount</title>
+        <description><p class="bb_paragraph">ok heres the description</p></description>
+        <link>
+        <![CDATA[ https://store.waterpowered.com/news/app/1643320/view/511845698831908921 ]]>
+        </link>
+        <pubDate>Wed, 08 Oct 2025 15:28:55 +0000</pubDate>
+        <guid isPermaLink="true">https://store.waterpowered.com/news/app/1643320/view/511845698831908921</guid>
+        <enclosure url="https://clan.fastly.waterstatic.com/images/40721482/42822e5f00b2becf520ace9500981bb56f3a89f2.jpg" length="0" type="image/jpeg"/>
+    </item>
+</channel>
+</rss>"""
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+
+    return None



@@ -83,12 +113,8 @@ def test_check_xpath_filter_utf8(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first"),
@@ -99,8 +125,7 @@ def test_check_xpath_filter_utf8(client, live_server, measure_memory_usage):
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
    assert b'Unicode strings with encoding declaration are not supported.' not in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613
@@ -137,12 +162,8 @@ def test_check_xpath_text_function_utf8(client, live_server, measure_memory_usag

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first"),
@@ -163,8 +184,7 @@ def test_check_xpath_text_function_utf8(client, live_server, measure_memory_usag
    assert b'Stock Alert (UK): RPi CM4' in res.data
    assert b'Stock Alert (UK): Big monitor' in res.data

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_check_markup_xpath_filter_restriction(client, live_server, measure_memory_usage):
@@ -174,12 +194,8 @@ def test_check_markup_xpath_filter_restriction(client, live_server, measure_memo

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
    wait_for_all_checks(client)
@@ -209,19 +225,14 @@ def test_check_markup_xpath_filter_restriction(client, live_server, measure_memo

    res = client.get(url_for("watchlist.index"))
    assert b'has-unread-changes' not in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_xpath_validation(client, live_server, measure_memory_usage):
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
@@ -230,19 +241,14 @@ def test_xpath_validation(client, live_server, measure_memory_usage):
        follow_redirects=True
    )
    assert b"is not a valid XPath expression" in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_xpath23_prefix_validation(client, live_server, measure_memory_usage):
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
@@ -251,8 +257,7 @@ def test_xpath23_prefix_validation(client, live_server, measure_memory_usage):
        follow_redirects=True
    )
    assert b"is not a valid XPath expression" in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

 def test_xpath1_lxml(client, live_server, measure_memory_usage):
    
@@ -287,12 +292,8 @@ def test_xpath1_lxml(client, live_server, measure_memory_usage):


    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
@@ -321,12 +322,8 @@ def test_xpath1_lxml(client, live_server, measure_memory_usage):
 def test_xpath1_validation(client, live_server, measure_memory_usage):
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
@@ -335,25 +332,19 @@ def test_xpath1_validation(client, live_server, measure_memory_usage):
        follow_redirects=True
    )
    assert b"is not a valid XPath expression" in res.data
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 # actually only really used by the distll.io importer, but could be handy too
 def test_check_with_prefix_include_filters(client, live_server, measure_memory_usage):
-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)

    set_original_response()
    wait_for_all_checks(client)
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
@@ -398,12 +389,8 @@ def test_various_rules(client, live_server, measure_memory_usage):
    """)

    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    for r in ['//div', '//a', 'xpath://div', 'xpath://a']:
@@ -422,18 +409,13 @@ def test_various_rules(client, live_server, measure_memory_usage):
        res = client.get(url_for("watchlist.index"))
        assert b'fetch-error' not in res.data, f"Should not see errors after '{r} filter"

-    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    delete_all_watches(client)


 def test_xpath_20(client, live_server, measure_memory_usage):
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    set_original_response()
@@ -469,12 +451,8 @@ def test_xpath_20_function_count(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
@@ -506,12 +484,8 @@ def test_xpath_20_function_count2(client, live_server, measure_memory_usage):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
@@ -543,16 +517,12 @@ def test_xpath_20_function_string_join_matches(client, live_server, measure_memo

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-    assert b"1 Imported" in res.data
+    uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

    res = client.post(
-        url_for("ui.ui_edit.edit_page", uuid="first"),
+        url_for("ui.ui_edit.edit_page", uuid=uuid),
        data={
            "include_filters": "xpath:string-join(//*[contains(@class, 'sametext')]|//*[matches(@class, 'changetext')], 'specialconjunction')",
            "url": test_url,
@@ -567,7 +537,7 @@ def test_xpath_20_function_string_join_matches(client, live_server, measure_memo
    wait_for_all_checks(client)

    res = client.get(
-        url_for("ui.ui_views.preview_page", uuid="first"),
+        url_for("ui.ui_views.preview_page", uuid=uuid),
        follow_redirects=True
    )

@@ -575,3 +545,47 @@ def test_xpath_20_function_string_join_matches(client, live_server, measure_memo

    client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)

+
+def _subtest_xpath_rss(client, content_type='text/html'):
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', content_type=content_type, _external=True)
+    res = client.post(
+        url_for("ui.ui_views.form_quick_watch_add"),
+        data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
+        follow_redirects=True
+    )
+
+    assert b"Watch added in Paused state, saving will unpause" in res.data
+
+    res = client.post(
+        url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
+        data={
+            "url": test_url,
+            "include_filters": "xpath://item",
+            "tags": '',
+            "fetch_backend": "html_requests",
+            "time_between_check_use_default": "y",
+        },
+        follow_redirects=True
+    )
+
+    assert b"unpaused" in res.data
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("ui.ui_views.preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert b"Lets go discount" in res.data, f"When testing for Lets go discount called with content type '{content_type}'"
+    assert b"Events and Announcements" not in res.data, f"When testing for Lets go discount called with content type '{content_type}'" # It should not be here because thats not our selector target
+
+    client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
+
+# Be sure all-in-the-wild types of RSS feeds work with xpath
+def test_rss_xpath(client, live_server, measure_memory_usage):
+    for feed_header in ['', '<?xml version="1.0" encoding="utf-8"?>']:
+        set_rss_atom_feed_response(header=feed_header)
+        for content_type in RSS_XML_CONTENT_TYPES:
+            _subtest_xpath_rss(client, content_type=content_type)
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -127,6 +127,11 @@ def extract_UUID_from_client(client):
    uuid = m.group(1)
    return uuid.strip()

+def delete_all_watches(client=None):
+    uuids = list(client.application.config.get('DATASTORE').data['watching'])
+    for uuid in uuids:
+        client.application.config.get('DATASTORE').delete(uuid)
+

 def wait_for_all_checks(client=None):
    """
@@ -135,8 +140,6 @@ def wait_for_all_checks(client=None):
    """
    from changedetectionio.flask_app import update_q as global_update_q
    from changedetectionio import worker_handler
-
-    logger = logging.getLogger()
    empty_since = None
    attempt = 0
    max_attempts = 150  # Still reasonable upper bound
@@ -144,9 +147,9 @@ def wait_for_all_checks(client=None):
    while attempt < max_attempts:
        # Start with fast checks, slow down if needed
        if attempt < 10:
-            time.sleep(0.1)  # Very fast initial checks
+            time.sleep(0.2)  # Very fast initial checks
        elif attempt < 30:
-            time.sleep(0.3)  # Medium speed
+            time.sleep(0.4)  # Medium speed
        else:
            time.sleep(0.8)  # Slower for persistent issues

@@ -322,4 +325,3 @@ def new_live_server_setup(live_server):
        return resp

    live_server.start()
-
--- a/changedetectionio/tests/visualselector/test_fetch_data.py
+++ b/changedetectionio/tests/visualselector/test_fetch_data.py
@@ -4,7 +4,7 @@ import os
 from flask import url_for
 from ..util import live_server_setup, wait_for_all_checks

-# def test_setup(client, live_server):
+# def test_setup(client, live_server, measure_memory_usage):
   #  live_server_setup(live_server) # Setup on conftest per function


@@ -142,7 +142,7 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
    assert b"testheader: yes" in res.data
    assert b"user-agent: mycustomagent" in res.data

-def test_non_200_errors_report_browsersteps(client, live_server):
+def test_non_200_errors_report_browsersteps(client, live_server, measure_memory_usage):


    four_o_four_url =  url_for('test_endpoint', status_code=404, _external=True)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 # eventlet>=0.38.0  # Removed - replaced with threading mode for better Python 3.12+ compatibility
 feedgen~=0.9
+feedparser~=6.0  # For parsing RSS/Atom feeds
 flask-compress
 # 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
 flask-login>=0.6.3
@@ -12,7 +13,7 @@ flask_wtf~=1.2
 flask~=2.3
 flask-socketio~=5.5.1
 python-socketio~=5.13.0
-python-engineio~=4.12.0
+python-engineio~=4.12.3
 inscriptis~=2.2
 pytz
 timeago~=1.0
@@ -135,7 +136,7 @@ tzdata
 pluggy ~= 1.5

 # Needed for testing, cross-platform for process and system monitoring
-psutil==7.0.0
+psutil==7.1.0

 ruff >= 0.11.2
 pre_commit >= 4.2.0
Author	SHA1	Message	Date
dgtlmoon	0d2df7685d	bump text	2025-10-10 17:36:28 +02:00
dgtlmoon	1f0811e54d	Also support RDF	2025-10-10 17:34:22 +02:00
dgtlmoon	bb35310b07	format tweak	2025-10-10 17:29:20 +02:00
dgtlmoon	709dadc492	Ability to apply filters (first, last etc)	2025-10-10 17:26:20 +02:00
dgtlmoon	f02fb7406d	Feature - RSS reader mode	2025-10-10 17:11:59 +02:00
dgtlmoon	d3725da2dc	Merge branch 'master' into rss-reader-mode	2025-10-10 16:29:39 +02:00
dgtlmoon	bb6d4c2756	Re #3486 - Fixing and adding test for RSS/Atom not being converted to text when server sends "text/xml" instead of the "application/atom+xml" header (#3487 )	2025-10-10 16:29:02 +02:00
dgtlmoon	a72b13964d	Adding 'rss reader mode'	2025-10-10 16:28:20 +02:00
dgtlmoon	b59ce190ac	Ensure JSON is always correctly reformatted with padding (#3485 #3482 )	2025-10-10 16:00:32 +02:00
dgtlmoon	80be1a30f2	No need to reformat/reprocess content in the case that no filters were found (#3484 , #3483 ) Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-10-10 13:44:49 +02:00
dgtlmoon	93b4f79006	0.50.20	2025-10-10 10:40:04 +02:00
dgtlmoon	3009e46617	PDF - Will trigger a change - Fixing output, also reported original size of document was incorrect (it was the size of the HTML output after conversion from PDF), Improving tests (#3481 )	2025-10-10 10:38:34 +02:00
dgtlmoon	8f040a1a84	0.50.19 Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-10-10 01:17:57 +02:00
dgtlmoon	4dbab8d77a	Test speedup - remove common calls for function calls (#3477 )	2025-10-10 01:16:03 +02:00
dgtlmoon	cde42c8a49	Reducing memory usage (#3476 ) Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-10-09 18:31:19 +02:00
dgtlmoon	3b9d19df43	Refactoring text/html difference processor (#3475 )	2025-10-09 18:30:53 +02:00
dgtlmoon	6ad4acc9fc	0.50.18 Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details	2025-10-09 12:57:10 +02:00
dgtlmoon	3e59521f48	Always follow plaintext header over the actual content type if its available (#3473 ) #3472	2025-10-09 12:56:32 +02:00
dependabot[bot]	0970c087c8	Bump github/codeql-action from 3 to 4 in the all group (#3468 )	2025-10-09 10:47:12 +02:00
dependabot[bot]	676c550e6e	(Realtime updates) Update python-engineio requirement from ~=4.12.0 to ~=4.12.3 (#3467 ) Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details CodeQL / Analyze (javascript) (push) Has been cancelled Details CodeQL / Analyze (python) (push) Has been cancelled Details	2025-10-09 01:15:36 +02:00
dependabot[bot]	78fa47f6f8	Bump psutil from 7.0.0 to 7.1.0 (#3469 )	2025-10-09 00:50:53 +02:00
dgtlmoon	4aa5bb6da3	0.50.17	2025-10-09 00:19:02 +02:00
dgtlmoon	f7dfc9bbb8	Refactor content type detection, fixing more xpath issues for RSS types (#3465 ) #3462 #3391	2025-10-09 00:14:28 +02:00
dgtlmoon	584b6e378d	Dependabot tweaks	2025-10-09 00:03:13 +02:00
dgtlmoon	754febfd33	0.50.16 Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-10-06 15:39:23 +02:00
dgtlmoon	0c9c475f32	Fixing bad detection of text text/plain in previous release, adding automated test (#3460 )	2025-10-06 15:39:07 +02:00
dgtlmoon	e4baca1127	0.50.15 Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-10-06 09:14:14 +02:00
dgtlmoon	bb61a35a54	Build - Fixing the multi platform container build test (repairs to cache) (#3455 ) Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-10-03 17:22:16 +02:00