oops

Do it threaded
oops forgot module
2026-02-19 20:56:01 +00:00 · 2026-02-18 17:52:57 +01:00 · 2026-02-18 17:39:09 +01:00 · 2026-02-18 17:34:01 +01:00 · 2026-02-18 17:32:16 +01:00 · 2026-02-18 17:29:45 +01:00
34 changed files with 1571 additions and 136 deletions
--- a/.github/nginx-reverse-proxy-test.conf
+++ b/.github/nginx-reverse-proxy-test.conf
@@ -0,0 +1,33 @@
+server {
+    listen 80;
+    server_name localhost;
+
+    # Test basic reverse proxy to changedetection.io
+    location / {
+        proxy_pass http://changedet-app:5000;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        # WebSocket support
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection "upgrade";
+    }
+
+    # Test subpath deployment with X-Forwarded-Prefix
+    location /changedet-sub/ {
+        proxy_pass http://changedet-app:5000/;
+        proxy_set_header X-Forwarded-Prefix /changedet-sub;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        # WebSocket support
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection "upgrade";
+    }
+}
--- a/.github/workflows/test-stack-reusable-workflow.yml
+++ b/.github/workflows/test-stack-reusable-workflow.yml
@@ -324,6 +324,175 @@ jobs:
        run: |
          docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'

+  nginx-reverse-proxy:
+    runs-on: ubuntu-latest
+    needs: build
+    timeout-minutes: 10
+    env:
+      PYTHON_VERSION: ${{ inputs.python-version }}
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Download Docker image artifact
+        uses: actions/download-artifact@v7
+        with:
+          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
+          path: /tmp
+
+      - name: Load Docker image
+        run: |
+          docker load -i /tmp/test-changedetectionio.tar
+
+      - name: Spin up services
+        run: |
+          docker network create changedet-network
+
+          # Start changedetection.io container with X-Forwarded headers support
+          docker run --name changedet-app --hostname changedet-app --network changedet-network \
+            -e USE_X_SETTINGS=true \
+            -d test-changedetectionio
+          sleep 3
+
+      - name: Start nginx reverse proxy
+        run: |
+          # Start nginx with our test configuration
+          docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \
+            -v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \
+            nginx:alpine
+          sleep 2
+
+      - name: Test reverse proxy - root path
+        run: |
+          echo "=== Testing nginx reverse proxy at root path ==="
+          curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html
+
+          # Check for changedetection.io UI elements
+          if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then
+            echo "✓ Found checkbox-uuid in response"
+          else
+            echo "ERROR: checkbox-uuid not found in response"
+            cat /tmp/nginx-test-root.html
+            exit 1
+          fi
+
+          # Check for watchlist content
+          if grep -q -i "watch" /tmp/nginx-test-root.html; then
+            echo "✓ Found watch/watchlist content in response"
+          else
+            echo "ERROR: watchlist content not found"
+            cat /tmp/nginx-test-root.html
+            exit 1
+          fi
+
+          echo "✓ Root path reverse proxy working correctly"
+
+      - name: Test reverse proxy - subpath with X-Forwarded-Prefix
+        run: |
+          echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ==="
+          curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html
+
+          # Check for changedetection.io UI elements
+          if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then
+            echo "✓ Found checkbox-uuid in subpath response"
+          else
+            echo "ERROR: checkbox-uuid not found in subpath response"
+            cat /tmp/nginx-test-subpath.html
+            exit 1
+          fi
+
+          echo "✓ Subpath reverse proxy working correctly"
+
+      - name: Test API through reverse proxy subpath
+        run: |
+          echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ==="
+
+          # Extract API key from the changedetection.io datastore
+          API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4)
+
+          if [ -z "$API_KEY" ]; then
+            echo "ERROR: Could not extract API key from datastore"
+            docker exec changedet-app cat /datastore/changedetection.json
+            exit 1
+          fi
+
+          echo "✓ Extracted API key: ${API_KEY:0:8}..."
+
+          # Create a watch via API through nginx proxy subpath
+          echo "Creating watch via POST to /changedet-sub/api/v1/watch"
+          RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \
+            -H "x-api-key: ${API_KEY}" \
+            -H "Content-Type: application/json" \
+            -d '{
+              "url": "https://example.com/test-nginx-proxy",
+              "tag": "nginx-test"
+            }')
+
+          HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
+          BODY=$(echo "$RESPONSE" | head -n-1)
+
+          if [ "$HTTP_CODE" != "201" ]; then
+            echo "ERROR: Expected HTTP 201, got $HTTP_CODE"
+            echo "Response: $BODY"
+            exit 1
+          fi
+
+          echo "✓ Watch created successfully (HTTP 201)"
+
+          # Extract the watch UUID from response
+          WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4)
+          echo "✓ Watch UUID: $WATCH_UUID"
+
+          # Update the watch via PUT through nginx proxy subpath
+          echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}"
+          RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
+            -H "x-api-key: ${API_KEY}" \
+            -H "Content-Type: application/json" \
+            -d '{
+              "paused": true
+            }')
+
+          HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
+          BODY=$(echo "$RESPONSE" | head -n-1)
+
+          if [ "$HTTP_CODE" != "200" ]; then
+            echo "ERROR: Expected HTTP 200, got $HTTP_CODE"
+            echo "Response: $BODY"
+            exit 1
+          fi
+
+          if echo "$BODY" | grep -q 'OK'; then
+            echo "✓ Watch updated successfully (HTTP 200, response: OK)"
+          else
+            echo "ERROR: Expected response 'OK', got: $BODY"
+            echo "Response: $BODY"
+            exit 1
+          fi
+
+          # Verify the watch is paused via GET
+          echo "Verifying watch is paused via GET"
+          RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
+            -H "x-api-key: ${API_KEY}")
+
+          if echo "$RESPONSE" | grep -q '"paused": *true'; then
+            echo "✓ Watch is paused as expected"
+          else
+            echo "ERROR: Watch paused state not confirmed"
+            echo "Response: $RESPONSE"
+            exit 1
+          fi
+
+          echo "✓ API tests through nginx subpath completed successfully"
+
+      - name: Cleanup nginx test
+        if: always()
+        run: |
+          docker logs nginx-proxy || true
+          docker logs changedet-app || true
+          docker stop nginx-proxy changedet-app || true
+          docker rm nginx-proxy changedet-app || true
+
+
+
  # Proxy tests
  proxy-tests:
    runs-on: ubuntu-latest
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki
 # Semver means never use .01, or 00. Should be .1.
-__version__ = '0.53.1'
+__version__ = '0.53.4'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
@@ -610,7 +610,7 @@ def main():

    @app.context_processor
    def inject_template_globals():
-        return dict(right_sticky="v{}".format(datastore.data['version_tag']),
+        return dict(right_sticky="v"+__version__,
                    new_version_available=app.config['NEW_VERSION_AVAILABLE'],
                    has_password=datastore.data['settings']['application']['password'] != False,
                    socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
--- a/changedetectionio/api/Tags.py
+++ b/changedetectionio/api/Tags.py
@@ -97,17 +97,6 @@ class Tag(Resource):
        # Delete the tag, and any tag reference
        del self.datastore.data['settings']['application']['tags'][uuid]

-        # Delete tag.json file if it exists
-        import os
-        tag_dir = os.path.join(self.datastore.datastore_path, uuid)
-        tag_json = os.path.join(tag_dir, "tag.json")
-        if os.path.exists(tag_json):
-            try:
-                os.unlink(tag_json)
-                logger.info(f"Deleted tag.json for tag {uuid}")
-            except Exception as e:
-                logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
-
        # Remove tag from all watches
        for watch_uuid, watch in self.datastore.data['watching'].items():
            if watch.get('tags') and uuid in watch['tags']:
--- a/changedetectionio/api/init.py
+++ b/changedetectionio/api/init.py
@@ -103,6 +103,7 @@ def validate_openapi_request(operation_id):
                if request.method.upper() != 'GET':
                    # Lazy import - only loaded when actually validating a request
                    from openapi_core.contrib.flask import FlaskOpenAPIRequest
+                    from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError

                    spec = get_openapi_spec()
                    openapi_request = FlaskOpenAPIRequest(request)
@@ -110,6 +111,16 @@ def validate_openapi_request(operation_id):
                    if result.errors:
                        error_details = []
                        for error in result.errors:
+                            # Skip path/server validation errors for reverse proxy compatibility
+                            # Flask routing already validates that endpoints exist (returns 404 if not).
+                            # OpenAPI validation here is primarily for request body schema validation.
+                            # When behind nginx/reverse proxy, URLs may have path prefixes that don't
+                            # match the OpenAPI server definitions, causing false positives.
+                            if isinstance(error, PathError):
+                                logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
+                                continue
+
+                            error_str = str(error)
                            # Extract detailed schema errors from __cause__
                            if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
                                for schema_error in error.__cause__.schema_errors:
@@ -117,9 +128,12 @@ def validate_openapi_request(operation_id):
                                    msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
                                    error_details.append(f"{field}: {msg}")
                            else:
-                                error_details.append(str(error))
+                                error_details.append(error_str)
+
+                        # Only raise if we have actual validation errors (not path/server issues)
+                        if error_details:
                            logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
-                        raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
+                            raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
            except BadRequest:
                # Re-raise BadRequest exceptions (validation failures)
                raise
--- a/changedetectionio/blueprint/backups/init.py
+++ b/changedetectionio/blueprint/backups/init.py
@@ -13,7 +13,7 @@ from loguru import logger
 BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"


-def create_backup(datastore_path, watches: dict):
+def create_backup(datastore_path, watches: dict, tags: dict = None):
    logger.debug("Creating backup...")
    import zipfile
    from pathlib import Path
@@ -45,6 +45,15 @@ def create_backup(datastore_path, watches: dict):
        if os.path.isfile(secret_file):
            zipObj.write(secret_file, arcname="secret.txt")

+        # Add tag data directories (each tag has its own {uuid}/tag.json)
+        for uuid, tag in (tags or {}).items():
+            for f in Path(tag.data_dir).glob('*'):
+                zipObj.write(f,
+                             arcname=os.path.join(f.parts[-2], f.parts[-1]),
+                             compress_type=zipfile.ZIP_DEFLATED,
+                             compresslevel=8)
+            logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
+
        # Add any data in the watch data directory.
        for uuid, w in watches.items():
            for f in Path(w.data_dir).glob('*'):
@@ -88,7 +97,10 @@ def create_backup(datastore_path, watches: dict):


 def construct_blueprint(datastore: ChangeDetectionStore):
+    from .restore import construct_restore_blueprint
+
    backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
+    backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
    backup_threads = []

    @login_optionally_required
@@ -96,16 +108,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def request_backup():
        if any(thread.is_alive() for thread in backup_threads):
            flash(gettext("A backup is already running, check back in a few minutes"), "error")
-            return redirect(url_for('backups.index'))
+            return redirect(url_for('backups.create'))

        if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
            flash(gettext("Maximum number of backups reached, please remove some"), "error")
-            return redirect(url_for('backups.index'))
+            return redirect(url_for('backups.create'))

        # With immediate persistence, all data is already saved
        zip_thread = threading.Thread(
            target=create_backup,
            args=(datastore.datastore_path, datastore.data.get("watching")),
+            kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
            daemon=True,
            name="BackupCreator"
        )
@@ -113,7 +126,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        backup_threads.append(zip_thread)
        flash(gettext("Backup building in background, check back in a few minutes."))

-        return redirect(url_for('backups.index'))
+        return redirect(url_for('backups.create'))

    def find_backups():
        backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
@@ -155,14 +168,14 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)

    @login_optionally_required
-    @backups_blueprint.route("", methods=['GET'])
-    def index():
+    @backups_blueprint.route("/", methods=['GET'])
+    @backups_blueprint.route("/create", methods=['GET'])
+    def create():
        backups = find_backups()
-        output = render_template("overview.html",
+        output = render_template("backup_create.html",
                                 available_backups=backups,
                                 backup_running=any(thread.is_alive() for thread in backup_threads)
                                 )
-
        return output

    @login_optionally_required
@@ -176,6 +189,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):

        flash(gettext("Backups were deleted."))

-        return redirect(url_for('backups.index'))
+        return redirect(url_for('backups.create'))

    return backups_blueprint
--- a/changedetectionio/blueprint/backups/restore.py
+++ b/changedetectionio/blueprint/backups/restore.py
@@ -0,0 +1,208 @@
+import io
+import json
+import os
+import shutil
+import tempfile
+import threading
+import zipfile
+
+from flask import Blueprint, render_template, flash, url_for, redirect, request
+from flask_babel import gettext, lazy_gettext as _l
+from wtforms import Form, BooleanField, SubmitField
+from flask_wtf.file import FileField, FileAllowed
+from loguru import logger
+
+from changedetectionio.flask_app import login_optionally_required
+
+
+class RestoreForm(Form):
+    zip_file = FileField(_l('Backup zip file'), validators=[
+        FileAllowed(['zip'], _l('Must be a .zip backup file!'))
+    ])
+    include_groups = BooleanField(_l('Include groups'), default=True)
+    include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
+    include_watches = BooleanField(_l('Include watches'), default=True)
+    include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
+    submit = SubmitField(_l('Restore backup'))
+
+
+def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
+    """
+    Extract and import watches and groups from a backup zip stream.
+
+    Mirrors the store's _load_watches / _load_tags loading pattern:
+      - UUID dirs with tag.json  → Tag.model + tag_obj.commit()
+      - UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
+
+    Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
+    Raises zipfile.BadZipFile if the stream is not a valid zip.
+    """
+    from changedetectionio.model import Tag
+
+    restored_groups = 0
+    skipped_groups = 0
+    restored_watches = 0
+    skipped_watches = 0
+
+    current_tags = datastore.data['settings']['application'].get('tags', {})
+    current_watches = datastore.data['watching']
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        logger.debug(f"Restore: extracting zip to {tmpdir}")
+        with zipfile.ZipFile(zip_stream, 'r') as zf:
+            zf.extractall(tmpdir)
+        logger.debug("Restore: zip extracted, scanning UUID directories")
+
+        for entry in os.scandir(tmpdir):
+            if not entry.is_dir():
+                continue
+
+            uuid = entry.name
+            tag_json_path = os.path.join(entry.path, 'tag.json')
+            watch_json_path = os.path.join(entry.path, 'watch.json')
+
+            # --- Tags (groups) ---
+            if include_groups and os.path.exists(tag_json_path):
+                if uuid in current_tags and not include_groups_replace:
+                    logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
+                    skipped_groups += 1
+                    continue
+
+                try:
+                    with open(tag_json_path, 'r', encoding='utf-8') as f:
+                        tag_data = json.load(f)
+                except (json.JSONDecodeError, IOError) as e:
+                    logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
+                    continue
+
+                title = tag_data.get('title', uuid)
+                logger.debug(f"Restore: importing group '{title}' ({uuid})")
+
+                # Mirror _load_tags: set uuid and force processor
+                tag_data['uuid'] = uuid
+                tag_data['processor'] = 'restock_diff'
+
+                # Copy the UUID directory so data_dir exists for commit()
+                dst_dir = os.path.join(datastore.datastore_path, uuid)
+                if os.path.exists(dst_dir):
+                    shutil.rmtree(dst_dir)
+                shutil.copytree(entry.path, dst_dir)
+
+                tag_obj = Tag.model(
+                    datastore_path=datastore.datastore_path,
+                    __datastore=datastore.data,
+                    default=tag_data
+                )
+                current_tags[uuid] = tag_obj
+                tag_obj.commit()
+                restored_groups += 1
+                logger.success(f"Restore: group '{title}' ({uuid}) restored")
+
+            # --- Watches ---
+            elif include_watches and os.path.exists(watch_json_path):
+                if uuid in current_watches and not include_watches_replace:
+                    logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
+                    skipped_watches += 1
+                    continue
+
+                try:
+                    with open(watch_json_path, 'r', encoding='utf-8') as f:
+                        watch_data = json.load(f)
+                except (json.JSONDecodeError, IOError) as e:
+                    logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
+                    continue
+
+                url = watch_data.get('url', uuid)
+                logger.debug(f"Restore: importing watch '{url}' ({uuid})")
+
+                # Copy UUID directory first so data_dir and history files exist
+                dst_dir = os.path.join(datastore.datastore_path, uuid)
+                if os.path.exists(dst_dir):
+                    shutil.rmtree(dst_dir)
+                shutil.copytree(entry.path, dst_dir)
+
+                # Mirror _load_watches / rehydrate_entity
+                watch_data['uuid'] = uuid
+                watch_obj = datastore.rehydrate_entity(uuid, watch_data)
+                current_watches[uuid] = watch_obj
+                watch_obj.commit()
+                restored_watches += 1
+                logger.success(f"Restore: watch '{url}' ({uuid}) restored")
+
+        logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
+                     f"watches {restored_watches} restored / {skipped_watches} skipped")
+
+    # Persist changedetection.json (includes the updated tags dict)
+    logger.debug("Restore: committing datastore settings")
+    datastore.commit()
+
+    return {
+        'restored_groups': restored_groups,
+        'skipped_groups': skipped_groups,
+        'restored_watches': restored_watches,
+        'skipped_watches': skipped_watches,
+    }
+
+
+
+def construct_restore_blueprint(datastore):
+    restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
+    restore_threads = []
+
+    @login_optionally_required
+    @restore_blueprint.route("/restore", methods=['GET'])
+    def restore():
+        form = RestoreForm()
+        return render_template("backup_restore.html",
+                               form=form,
+                               restore_running=any(t.is_alive() for t in restore_threads))
+
+    @login_optionally_required
+    @restore_blueprint.route("/restore/start", methods=['POST'])
+    def backups_restore_start():
+        if any(t.is_alive() for t in restore_threads):
+            flash(gettext("A restore is already running, check back in a few minutes"), "error")
+            return redirect(url_for('backups.restore.restore'))
+
+        zip_file = request.files.get('zip_file')
+        if not zip_file or not zip_file.filename:
+            flash(gettext("No file uploaded"), "error")
+            return redirect(url_for('backups.restore.restore'))
+
+        if not zip_file.filename.lower().endswith('.zip'):
+            flash(gettext("File must be a .zip backup file"), "error")
+            return redirect(url_for('backups.restore.restore'))
+
+        # Read into memory now — the request stream is gone once we return
+        try:
+            zip_bytes = io.BytesIO(zip_file.read())
+            zipfile.ZipFile(zip_bytes)  # quick validity check before spawning
+            zip_bytes.seek(0)
+        except zipfile.BadZipFile:
+            flash(gettext("Invalid or corrupted zip file"), "error")
+            return redirect(url_for('backups.restore.restore'))
+
+        include_groups = request.form.get('include_groups') == 'y'
+        include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
+        include_watches = request.form.get('include_watches') == 'y'
+        include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
+
+        restore_thread = threading.Thread(
+            target=import_from_zip,
+            kwargs={
+                'zip_stream': zip_bytes,
+                'datastore': datastore,
+                'include_groups': include_groups,
+                'include_groups_replace': include_groups_replace,
+                'include_watches': include_watches,
+                'include_watches_replace': include_watches_replace,
+            },
+            daemon=True,
+            name="BackupRestore"
+        )
+        restore_thread.start()
+        restore_threads.append(restore_thread)
+        flash(gettext("Restore started in background, check back in a few minutes."))
+        return redirect(url_for('backups.restore.restore'))
+
+    return restore_blueprint
--- a/changedetectionio/blueprint/backups/templates/backup_create.html
+++ b/changedetectionio/blueprint/backups/templates/backup_create.html
@@ -0,0 +1,49 @@
+{% extends 'base.html' %}
+{% block content %}
+    {% from '_helpers.html' import render_simple_field, render_field %}
+
+    <div class="edit-form">
+        <div class="tabs collapsable">
+            <ul>
+                <li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
+                <li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
+            </ul>
+        </div>
+        <div class="box-wrap inner">
+            <div id="general">
+                {% if backup_running %}
+                    <p>
+                        <span class="spinner"></span>&nbsp;<strong>{{ _('A backup is running!') }}</strong>
+                    </p>
+                {% endif %}
+
+                <p>
+                    {{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
+                </p>
+                <br>
+                {% if available_backups %}
+                    <ul>
+                        {% for backup in available_backups %}
+                            <li>
+                                <a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
+                            </li>
+                        {% endfor %}
+                    </ul>
+                {% else %}
+                    <p>
+                        <strong>{{ _('No backups found.') }}</strong>
+                    </p>
+                {% endif %}
+
+                <a class="pure-button pure-button-primary"
+                   href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
+                {% if available_backups %}
+                    <a class="pure-button button-small button-error "
+                       href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
+                {% endif %}
+
+            </div>
+
+        </div>
+    </div>
+{% endblock %}
--- a/changedetectionio/blueprint/backups/templates/backup_restore.html
+++ b/changedetectionio/blueprint/backups/templates/backup_restore.html
@@ -0,0 +1,58 @@
+{% extends 'base.html' %}
+{% block content %}
+    {% from '_helpers.html' import render_field, render_checkbox_field %}
+
+    <div class="edit-form">
+        <div class="tabs collapsable">
+            <ul>
+                <li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
+                <li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
+            </ul>
+        </div>
+        <div class="box-wrap inner">
+            <div id="general">
+                {% if restore_running %}
+                    <p>
+                        <span class="spinner"></span>&nbsp;<strong>{{ _('A restore is running!') }}</strong>
+                    </p>
+                {% endif %}
+
+                <p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
+                <p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
+
+                <form class="pure-form pure-form-stacked settings"
+                      action="{{ url_for('backups.restore.backups_restore_start') }}"
+                      method="POST"
+                      enctype="multipart/form-data">
+                    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
+
+                    <div class="pure-control-group">
+                        {{ render_checkbox_field(form.include_groups) }}
+                        <span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
+                    </div>
+                    <div class="pure-control-group">
+                        {{ render_checkbox_field(form.include_groups_replace_existing) }}
+                        <span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
+                    </div>
+
+                    <div class="pure-control-group">
+                        {{ render_checkbox_field(form.include_watches) }}
+                        <span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
+                    </div>
+                    <div class="pure-control-group">
+                        {{ render_checkbox_field(form.include_watches_replace_existing) }}
+                        <span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
+                    </div>
+
+                    <div class="pure-control-group">
+                        {{ render_field(form.zip_file) }}
+                    </div>
+
+                    <div class="pure-controls">
+                        <button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
+                    </div>
+                </form>
+            </div>
+        </div>
+    </div>
+{% endblock %}
--- a/changedetectionio/blueprint/backups/templates/overview.html
+++ b/changedetectionio/blueprint/backups/templates/overview.html
@@ -1,36 +0,0 @@
-{% extends 'base.html' %}
-{% block content %}
-    {% from '_helpers.html' import render_simple_field, render_field %}
-    <div class="edit-form">
-        <div class="box-wrap inner">
-            <h2>{{ _('Backups') }}</h2>
-            {% if backup_running %}
-                <p>
-                    <span class="spinner"></span>&nbsp;<strong>{{ _('A backup is running!') }}</strong>
-                </p>
-            {% endif %}
-            <p>
-                {{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
-            </p>
-            <br>
-                {% if available_backups %}
-                    <ul>
-                    {% for backup in available_backups %}
-                        <li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{  backup["filesize"] }} {{ _('Mb') }}</li>
-                    {% endfor %}
-                    </ul>
-                {% else %}
-                    <p>
-                    <strong>{{ _('No backups found.') }}</strong>
-                    </p>
-                {% endif %}
-
-            <a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
-            {% if available_backups %}
-                <a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
-            {% endif %}
-        </div>
-    </div>
-
-
-{% endblock %}
--- a/changedetectionio/blueprint/imports/templates/import.html
+++ b/changedetectionio/blueprint/imports/templates/import.html
@@ -16,6 +16,11 @@
        <form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
            <div class="tab-pane-inner" id="url-list">
+
+                <p>
+                {{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
+                <br>
+                </p>
                <div class="pure-control-group">
                        {{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
                        <br>
@@ -37,9 +42,6 @@
            </div>

            <div class="tab-pane-inner" id="distill-io">
-
-
-
                    <div class="pure-control-group">
                        {{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
                        {{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
@@ -49,8 +51,6 @@
                        {{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
                        </p>
                    </div>
-
-
                    <textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
                                font-family:monospace;
                                white-space: pre;
@@ -114,6 +114,7 @@
                </div>
            </div>
            <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
+
        </form>

    </div>
--- a/changedetectionio/blueprint/settings/templates/settings.html
+++ b/changedetectionio/blueprint/settings/templates/settings.html
@@ -25,7 +25,7 @@
            <li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
            <li class="tab"><a href="#api">{{ _('API') }}</a></li>
            <li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
-            <li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
+            <li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
            <li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
            <li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
            {% if plugin_tabs %}
--- a/changedetectionio/blueprint/tags/init.py
+++ b/changedetectionio/blueprint/tags/init.py
@@ -70,17 +70,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        if datastore.data['settings']['application']['tags'].get(uuid):
            del datastore.data['settings']['application']['tags'][uuid]

-        # Delete tag.json file if it exists
-        import os
-        tag_dir = os.path.join(datastore.datastore_path, uuid)
-        tag_json = os.path.join(tag_dir, "tag.json")
-        if os.path.exists(tag_json):
-            try:
-                os.unlink(tag_json)
-                logger.info(f"Deleted tag.json for tag {uuid}")
-            except Exception as e:
-                logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
-
        # Remove tag from all watches in background thread to avoid blocking
        def remove_tag_background(tag_uuid):
            """Background thread to remove tag from watches - discarded after completion."""
@@ -127,19 +116,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    @tags_blueprint.route("/delete_all", methods=['GET'])
    @login_optionally_required
    def delete_all():
-        # Delete all tag.json files
-        import os
-        for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
-            tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
-            tag_json = os.path.join(tag_dir, "tag.json")
-            if os.path.exists(tag_json):
-                try:
-                    os.unlink(tag_json)
-                except Exception as e:
-                    logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")

-        # Clear all tags from settings immediately
-        datastore.data['settings']['application']['tags'] = {}
+        for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
+# TagsDict 'del' handler will remove the dir
+            del datastore.data['settings']['application']['tags'][tag_uuid]
+

        # Clear tags from all watches in background thread to avoid blocking
        def clear_all_tags_background():
@@ -255,7 +236,4 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        return redirect(url_for('tags.tags_overview_page'))


-    @tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
-    def form_tag_delete(uuid):
-        return redirect(url_for('tags.tags_overview_page'))
    return tags_blueprint
--- a/changedetectionio/blueprint/ui/init.py
+++ b/changedetectionio/blueprint/ui/init.py
@@ -194,9 +194,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
        tag_limit = request.args.get('tag')
        now = int(time.time())

-        # Mark watches as viewed in background thread to avoid blocking
-        def mark_viewed_background():
-            """Background thread to mark watches as viewed - discarded after completion."""
+        # Mark watches as viewed - use background thread only for large watch counts
+        def mark_viewed_impl():
+            """Mark watches as viewed - can run synchronously or in background thread."""
            marked_count = 0
            try:
                for watch_uuid, watch in datastore.data['watching'].items():
@@ -209,15 +209,21 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
                    datastore.set_last_viewed(watch_uuid, now)
                    marked_count += 1

-                logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
+                logger.info(f"Marking complete: {marked_count} watches marked as viewed")
            except Exception as e:
-                logger.error(f"Error in background mark as viewed: {e}")
+                logger.error(f"Error marking as viewed: {e}")

-        # Start background thread and return immediately
-        thread = threading.Thread(target=mark_viewed_background, daemon=True)
-        thread.start()
+        # For small watch counts (< 10), run synchronously to avoid race conditions in tests
+        # For larger counts, use background thread to avoid blocking the UI
+        watch_count = len(datastore.data['watching'])
+        if watch_count < 10:
+            # Run synchronously for small watch counts
+            mark_viewed_impl()
+        else:
+            # Start background thread for large watch counts
+            thread = threading.Thread(target=mark_viewed_impl, daemon=True)
+            thread.start()

-        flash(gettext("Marking watches as viewed in background..."))
        return redirect(url_for('watchlist.index', tag=tag_limit))

    @ui_blueprint.route("/delete", methods=['GET'])
--- a/changedetectionio/blueprint/watchlist/templates/watch-overview.html
+++ b/changedetectionio/blueprint/watchlist/templates/watch-overview.html
@@ -304,12 +304,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
                            </span>
                        {%- endif -%}

-                        {%- if watch.get('restock') and watch['restock']['price'] != None -%}
-                            {%- if watch['restock']['price'] != None -%}
+                        {%- if watch.get('restock') and watch['restock'].get('price') -%}
+                                {%- if watch['restock']['price'] is number -%}
                                <span class="restock-label price" title="{{ _('Price') }}">
                                {{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
                                </span>
-                            {%- endif -%}
+                                {%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
+                                {%- endif -%}
                        {%- elif not watch.has_restock_info -%}
                            <span class="restock-label error">{{ _('No information') }}</span>
                        {%- endif -%}
--- a/changedetectionio/content_fetchers/puppeteer.py
+++ b/changedetectionio/content_fetchers/puppeteer.py
@@ -86,8 +86,8 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
        # better than scrollTo incase they override it in the page
        await page.evaluate(
            """(y) => {
-                document.documentElement.scrollTop = y;
-                document.body.scrollTop = y;
+                const el = document.scrollingElement;
+                if (el) el.scrollTop = y;
            }""",
            y
        )
@@ -305,6 +305,8 @@ class fetcher(Fetcher):
                await asyncio.wait_for(self.browser.close(), timeout=3.0)
            except Exception as cleanup_error:
                logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
+            finally:
+                self.browser = None
            raise
        
        # Add console handler to capture console.log from favicon fetcher
@@ -532,6 +534,14 @@ class fetcher(Fetcher):
            )
        except asyncio.TimeoutError:
            raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
+        finally:
+            # Internal cleanup on any exception/timeout - call quit() immediately
+            # This prevents connection leaks during exception bursts
+            # Worker.py's quit() call becomes a redundant safety net (idempotent)
+            try:
+                await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
+            except Exception as cleanup_error:
+                logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")


 # Plugin registration for built-in fetcher
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -565,6 +565,27 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
    if is_rss:
        html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
        html_content = re.sub(r'</title>', r'</h1>', html_content)
+    else:
+        # Strip bloat in one pass, SPA's often dump 10Mb+ into the <head> for styles, which is not needed
+        # Causing inscriptis to silently exit when more than ~10MB is found.
+        # All we are doing here is converting the HTML to text, no CSS layout etc
+        # Use backreference (\1) to ensure opening/closing tags match (prevents <style> matching </svg> in CSS data URIs)
+        html_content = re.sub(r'<(style|script|svg|noscript)[^>]*>.*?</\1>|<(?:link|meta)[^>]*/?>|<!--.*?-->',
+                              '', html_content, flags=re.DOTALL | re.IGNORECASE)
+
+        # SPAs often use <body style="display:none"> to hide content until JS loads
+        # inscriptis respects CSS display rules, so we need to remove these hiding styles
+        # to extract the actual page content
+        body_style_pattern = r'(<body[^>]*)\s+style\s*=\s*["\']([^"\']*\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b[^"\']*)["\']'
+
+        # Check if body has hiding styles that need to be fixed
+        body_match = re.search(body_style_pattern, html_content, flags=re.IGNORECASE)
+        if body_match:
+            from loguru import logger
+            logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{body_match.group(2)}')")
+
+        html_content = re.sub(body_style_pattern, r'\1', html_content, flags=re.IGNORECASE)
+

    text_content = get_text(html_content, config=parser_config)
    return text_content
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -2,6 +2,7 @@ from os import getenv
 from copy import deepcopy

 from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
+from changedetectionio.model.Tags import TagsDict

 from changedetectionio.notification import (
    default_notification_body,
@@ -68,7 +69,7 @@ class model(dict):
                    'schema_version' : 0,
                    'shared_diff_access': False,
                    'strip_ignored_lines': False,
-                    'tags': {}, #@todo use Tag.model initialisers
+                    'tags': None,  # Initialized in __init__ with real datastore_path
                    'webdriver_delay': None , # Extra delay in seconds before extracting text
                    'ui': {
                        'use_page_title_in_list': True,
@@ -80,10 +81,16 @@ class model(dict):
            }
        }

-    def __init__(self, *arg, **kw):
+    def __init__(self, *arg, datastore_path=None, **kw):
        super(model, self).__init__(*arg, **kw)
+        # Capture any tags data passed in before base_config overwrites the structure
+        existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
        # CRITICAL: deepcopy to avoid sharing mutable objects between instances
        self.update(deepcopy(self.base_config))
+        # TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
+        if datastore_path is None:
+            raise ValueError("App.model() requires 'datastore_path' keyword argument")
+        self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)


 def parse_headers_from_text_file(filepath):
--- a/changedetectionio/model/Tags.py
+++ b/changedetectionio/model/Tags.py
@@ -0,0 +1,39 @@
+import os
+import shutil
+from pathlib import Path
+from loguru import logger
+
+_SENTINEL = object()
+
+
+class TagsDict(dict):
+    """Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
+
+    def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
+        self._datastore_path = Path(datastore_path)
+        super().__init__(*args, **kwargs)
+
+    def __delitem__(self, key: str) -> None:
+        super().__delitem__(key)
+        tag_dir = self._datastore_path / key
+        tag_json_file = tag_dir / "tag.json"
+        if not os.path.exists(tag_json_file):
+            logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
+            return
+        try:
+            shutil.rmtree(tag_dir)
+            logger.info(f"Deleted tag directory for tag {key!r}")
+        except FileNotFoundError:
+            pass
+        except OSError as e:
+            logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
+
+    def pop(self, key: str, default=_SENTINEL):
+        """Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
+        if key in self:
+            value = self[key]
+            del self[key]
+            return value
+        if default is _SENTINEL:
+            raise KeyError(key)
+        return default
--- a/changedetectionio/pluggy_interface.py
+++ b/changedetectionio/pluggy_interface.py
@@ -129,6 +129,51 @@ class ChangeDetectionSpec:
        """
        pass

+    @hookspec
+    def update_handler_alter(update_handler, watch, datastore):
+        """Modify or wrap the update_handler before it processes a watch.
+
+        This hook is called after the update_handler (perform_site_check instance) is created
+        but before it calls call_browser() and run_changedetection(). Plugins can use this to:
+        - Wrap the handler to add logging/metrics
+        - Modify handler configuration
+        - Add custom preprocessing logic
+
+        Args:
+            update_handler: The perform_site_check instance that will process the watch
+            watch: The watch dict being processed
+            datastore: The application datastore
+
+        Returns:
+            object or None: Return a modified/wrapped handler, or None to keep the original.
+                           If multiple plugins return handlers, they are chained in registration order.
+        """
+        pass
+
+    @hookspec
+    def update_finalize(update_handler, watch, datastore, processing_exception):
+        """Called after watch processing completes (success or failure).
+
+        This hook is called in the finally block after all processing is complete,
+        allowing plugins to perform cleanup, update metrics, or log final status.
+
+        The plugin can access update_handler.last_logging_insert_id if it was stored
+        during update_handler_alter, and use processing_exception to determine if
+        the processing succeeded or failed.
+
+        Args:
+            update_handler: The perform_site_check instance (may be None if creation failed)
+            watch: The watch dict that was processed (may be None if not loaded)
+            datastore: The application datastore
+            processing_exception: The exception from the main processing block, or None if successful.
+                                 This does NOT include cleanup exceptions - only exceptions from
+                                 the actual watch processing (fetch, diff, etc).
+
+        Returns:
+            None: This hook doesn't return a value
+        """
+        pass
+

 # Set up Plugin Manager
 plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -499,4 +544,66 @@ def get_plugin_template_paths():
                template_paths.append(templates_dir)
                logger.debug(f"Added plugin template path: {templates_dir}")

-    return template_paths
+    return template_paths
+
+
+def apply_update_handler_alter(update_handler, watch, datastore):
+    """Apply update_handler_alter hooks from all plugins.
+
+    Allows plugins to wrap or modify the update_handler before it processes a watch.
+    Multiple plugins can chain modifications - each plugin receives the result from
+    the previous plugin.
+
+    Args:
+        update_handler: The perform_site_check instance to potentially modify
+        watch: The watch dict being processed
+        datastore: The application datastore
+
+    Returns:
+        object: The (potentially modified/wrapped) update_handler
+    """
+    # Get all plugins that implement the update_handler_alter hook
+    results = plugin_manager.hook.update_handler_alter(
+        update_handler=update_handler,
+        watch=watch,
+        datastore=datastore
+    )
+
+    # Chain results - each plugin gets the result from the previous one
+    current_handler = update_handler
+    if results:
+        for result in results:
+            if result is not None:
+                logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
+                current_handler = result
+
+    return current_handler
+
+
+def apply_update_finalize(update_handler, watch, datastore, processing_exception):
+    """Apply update_finalize hooks from all plugins.
+
+    Called in the finally block after watch processing completes, allowing plugins
+    to perform cleanup, update metrics, or log final status.
+
+    Args:
+        update_handler: The perform_site_check instance (may be None)
+        watch: The watch dict that was processed (may be None)
+        datastore: The application datastore
+        processing_exception: The exception from processing, or None if successful
+
+    Returns:
+        None
+    """
+    try:
+        # Call all plugins that implement the update_finalize hook
+        plugin_manager.hook.update_finalize(
+            update_handler=update_handler,
+            watch=watch,
+            datastore=datastore,
+            processing_exception=processing_exception
+        )
+    except Exception as e:
+        # Don't let plugin errors crash the worker
+        logger.error(f"Error in update_finalize hook: {e}")
+        logger.exception(f"update_finalize hook exception details:")
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -347,6 +347,7 @@ class ContentProcessor:
    def extract_text_from_html(self, html_content, stream_content_type):
        """Convert HTML to plain text."""
        do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
+
        return html_tools.html_to_text(
            html_content=html_content,
            render_anchor_tag_content=do_anchor,
--- a/changedetectionio/static/js/search-modal.js
+++ b/changedetectionio/static/js/search-modal.js
@@ -102,7 +102,9 @@
        }

        // Navigate to search results (always redirect to watchlist home)
-        window.location.href = '/?' + params.toString();
+        // Use base_path if available (for sub-path deployments like /enlighten-richerx)
+        const basePath = typeof base_path !== 'undefined' ? base_path : '';
+        window.location.href = basePath + '/?' + params.toString();
      });
    }
  });
--- a/changedetectionio/static/styles/diff.css
+++ b/changedetectionio/static/styles/diff.css
@@ -1 +1 @@
-#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
+#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces;overflow-wrap:anywhere}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
--- a/changedetectionio/static/styles/scss/diff.scss
+++ b/changedetectionio/static/styles/scss/diff.scss
@@ -62,6 +62,7 @@ body.difference-page {

  pre {
    white-space: break-spaces;
+    overflow-wrap: anywhere;
  }


--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
--- a/changedetectionio/store/init.py
+++ b/changedetectionio/store/init.py
@@ -22,6 +22,8 @@ import uuid as uuid_builder
 from loguru import logger
 from blinker import signal

+from ..model.Tags import TagsDict
+
 # Try to import orjson for faster JSON serialization
 try:
    import orjson
@@ -121,6 +123,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            if 'application' in settings_data['settings']:
                self.__data['settings']['application'].update(settings_data['settings']['application'])

+                # Use our Tags dict with cleanup helpers etc
+                # @todo Same for Watches
+                existing_tags = settings_data.get('settings', {}).get('application', {}).get('tags') or {}
+                self.__data['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=self.datastore_path)
+
        # More or less for the old format which had this data in the one url-watches.json
        # cant hurt to leave it here,
        if 'watching' in settings_data:
@@ -196,7 +203,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        self.datastore_path = datastore_path

        # Initialize data structure
-        self.__data = App.model()
+        self.__data = App.model(datastore_path=datastore_path)
        self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")

        # Base definition for all watchers (deepcopy part of #569)
@@ -355,6 +362,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        # Deep copy settings to avoid modifying the original
        settings_copy = copy.deepcopy(self.__data['settings'])

+        # Is saved as {uuid}/tag.json
+        settings_copy['application']['tags'] = {}
+
        return {
            'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
            'app_guid': self.__data.get('app_guid'),
--- a/changedetectionio/store/updates.py
+++ b/changedetectionio/store/updates.py
@@ -669,7 +669,9 @@ class DatastoreUpdatesMixin:
    def update_26(self):
        self.migrate_legacy_db_format()

-    def update_28(self):
+    # Re-run tag to JSON migration
+    def update_29(self):
+
        """
        Migrate tags to individual tag.json files.

@@ -682,8 +684,6 @@ class DatastoreUpdatesMixin:
        - Enables independent tag versioning/backup
        - Maintains backwards compatibility (tags stay in settings too)
        """
-        # Force save as tag.json (not watch.json) even if object is corrupted
-
        logger.critical("=" * 80)
        logger.critical("Running migration: Individual tag persistence (update_28)")
        logger.critical("Creating individual tag.json files")
@@ -702,6 +702,9 @@ class DatastoreUpdatesMixin:
        failed_count = 0

        for uuid, tag_data in tags.items():
+            if os.path.isfile(os.path.join(self.datastore_path, uuid, "tag.json")):
+                logger.debug(f"Tag {uuid} tag.json exists, skipping")
+                continue
            try:
                tag_data.commit()
                saved_count += 1
@@ -723,3 +726,7 @@ class DatastoreUpdatesMixin:
        logger.info("Future tag edits will update both locations (dual storage)")
        logger.critical("=" * 80)

+        # write it to disk, it will be saved without ['tags'] in the JSON db because we find it from disk glob
+        # (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
+        self._save_settings()
+
--- a/changedetectionio/templates/edit/text-options.html
+++ b/changedetectionio/templates/edit/text-options.html
@@ -10,6 +10,7 @@
                        <li>{{ _('Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
                        <li>{{ _('Each line is processed separately (think of each line as "OR")') }}</li>
                        <li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
+                        <li>{{ _('You can also use')}} <a href="#conditions">{{ _('conditions')}}</a> - {{ _('"Page text" - with Contains, Starts With, Not Contains and many more' ) }} <code>/foo\d/</code></li>
                    </ul>
                        </span>
                    </div>
--- a/changedetectionio/tests/test_backup.py
+++ b/changedetectionio/tests/test_backup.py
@@ -6,11 +6,10 @@ import io
 from zipfile import ZipFile
 import re
 import time
+from changedetectionio.model import Watch, Tag


 def test_backup(client, live_server, measure_memory_usage, datastore_path):
-   #  live_server_setup(live_server) # Setup on conftest per function
-
    set_original_response(datastore_path=datastore_path)


@@ -32,7 +31,7 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
    time.sleep(4)

    res = client.get(
-        url_for("backups.index"),
+        url_for("backups.create"),
        follow_redirects=True
    )
    # Can see the download link to the backup
@@ -80,11 +79,12 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):

 def test_watch_data_package_download(client, live_server, measure_memory_usage, datastore_path):
    """Test downloading a single watch's data as a zip package"""
-    import os

    set_original_response(datastore_path=datastore_path)

    uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
+    tag_uuid = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag")
+    tag_uuid2 = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag number two")
    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)

    wait_for_all_checks(client)
@@ -113,4 +113,87 @@ def test_watch_data_package_download(client, live_server, measure_memory_usage,
    # Should contain history/snapshot files
    uuid4hex_txt = re.compile(f'^{re.escape(uuid)}/.*\\.txt', re.I)
    txt_files = list(filter(uuid4hex_txt.match, files))
-    assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
+    assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
+
+
+def test_backup_restore(client, live_server, measure_memory_usage, datastore_path):
+    """Test that a full backup zip can be restored — watches and tags survive a round-trip."""
+
+    set_original_response(datastore_path=datastore_path)
+
+    datastore = live_server.app.config['DATASTORE']
+    watch_url = url_for('test_endpoint', _external=True)
+
+    # Set up: one watch and two tags
+    uuid = datastore.add_watch(url=watch_url)
+    tag_uuid = datastore.add_tag(title="Tasty backup tag")
+    tag_uuid2 = datastore.add_tag(title="Tasty backup tag number two")
+
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+
+    # Create a full backup
+    client.get(url_for("backups.request_backup"), follow_redirects=True)
+    time.sleep(4)
+
+    # Download the latest backup zip
+    res = client.get(url_for("backups.download_backup", filename="latest"), follow_redirects=True)
+    assert res.content_type == "application/zip"
+    zip_data = res.data
+
+    # Confirm the zip contains both watch.json and tag.json entries
+    backup = ZipFile(io.BytesIO(zip_data))
+    names = backup.namelist()
+    assert f"{uuid}/watch.json" in names, f"watch.json missing from backup: {names}"
+    assert f"{tag_uuid}/tag.json" in names, f"tag.json for tag 1 missing from backup: {names}"
+    assert f"{tag_uuid2}/tag.json" in names, f"tag.json for tag 2 missing from backup: {names}"
+
+    # --- Wipe everything ---
+    datastore.delete('all')
+    client.get(url_for("tags.delete_all"), follow_redirects=True)
+
+    assert uuid not in datastore.data['watching'], "Watch should be gone after delete"
+    assert tag_uuid not in datastore.data['settings']['application']['tags'], "Tag 1 should be gone after delete"
+    assert tag_uuid2 not in datastore.data['settings']['application']['tags'], "Tag 2 should be gone after delete"
+
+    # --- Restore from the backup zip ---
+    res = client.post(
+        url_for("backups.restore.backups_restore_start"),
+        data={
+            'zip_file': (io.BytesIO(zip_data), 'backup.zip'),
+            'include_groups': 'y',
+            'include_groups_replace_existing': 'y',
+            'include_watches': 'y',
+            'include_watches_replace_existing': 'y',
+        },
+        content_type='multipart/form-data',
+        follow_redirects=True
+    )
+    assert res.status_code == 200
+
+    # Wait for the thread to finish
+    time.sleep(2)
+
+    # --- Watch checks ---
+    restored_watch = datastore.data['watching'].get(uuid)
+    assert restored_watch is not None, f"Watch {uuid} not found after restore"
+    assert restored_watch['url'] == watch_url, "Restored watch URL does not match"
+    assert isinstance(restored_watch, Watch.model), \
+        f"Watch not properly rehydrated, got {type(restored_watch)}"
+    assert restored_watch.history_n >= 1, \
+        f"Restored watch should have at least 1 history entry, got {restored_watch.history_n}"
+
+    # --- Tag checks ---
+    restored_tags = datastore.data['settings']['application']['tags']
+
+    restored_tag = restored_tags.get(tag_uuid)
+    assert restored_tag is not None, f"Tag {tag_uuid} not found after restore"
+    assert restored_tag['title'] == "Tasty backup tag", "Restored tag 1 title does not match"
+    assert isinstance(restored_tag, Tag.model), \
+        f"Tag 1 not properly rehydrated, got {type(restored_tag)}"
+
+    restored_tag2 = restored_tags.get(tag_uuid2)
+    assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
+    assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
+    assert isinstance(restored_tag2, Tag.model), \
+        f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
--- a/changedetectionio/tests/test_security.py
+++ b/changedetectionio/tests/test_security.py
@@ -40,6 +40,7 @@ def test_favicon(client, live_server, measure_memory_usage, datastore_path):
    res = client.get(url_for('static_content', group='..', filename='__init__.py'))
    assert res.status_code != 200

+
    res = client.get(url_for('static_content', group='.', filename='../__init__.py'))
    assert res.status_code != 200

--- a/changedetectionio/tests/unit/test_html_to_text.py
+++ b/changedetectionio/tests/unit/test_html_to_text.py
@@ -199,6 +199,259 @@ class TestHtmlToText(unittest.TestCase):

        print(f"✓ Basic thread-safety test passed: {len(results)} threads, no errors")

+    def test_large_html_with_bloated_head(self):
+        """
+        Test that html_to_text can handle large HTML documents with massive <head> bloat.
+
+        SPAs often dump 10MB+ of styles, scripts, and other bloat into the <head> section.
+        This can cause inscriptis to silently exit when processing very large documents.
+        The fix strips <style>, <script>, <svg>, <noscript>, <link>, <meta>, and HTML comments
+        before processing, allowing extraction of actual body content.
+        """
+        # Generate massive style block (~5MB)
+        large_style = '<style>' + '.class{color:red;}\n' * 200000 + '</style>\n'
+
+        # Generate massive script block (~5MB)
+        large_script = '<script>' + 'console.log("bloat");\n' * 200000 + '</script>\n'
+
+        # Generate lots of SVG bloat (~3MB)
+        svg_bloat = '<svg><path d="M0,0 L100,100"/></svg>\n' * 50000
+
+        # Generate meta/link tags (~2MB)
+        meta_bloat = '<meta name="description" content="bloat"/>\n' * 50000
+        link_bloat = '<link rel="stylesheet" href="bloat.css"/>\n' * 50000
+
+        # Generate HTML comments (~1MB)
+        comment_bloat = '<!-- This is bloat -->\n' * 50000
+
+        # Generate noscript bloat
+        noscript_bloat = '<noscript>Enable JavaScript</noscript>\n' * 10000
+
+        # Build the large HTML document
+        html = f'''<!DOCTYPE html>
+<html>
+<head>
+    <title>Test Page</title>
+    {large_style}
+    {large_script}
+    {svg_bloat}
+    {meta_bloat}
+    {link_bloat}
+    {comment_bloat}
+    {noscript_bloat}
+</head>
+<body>
+    <h1>Important Heading</h1>
+    <p>This is the actual content that should be extracted.</p>
+    <div>
+        <p>First paragraph with meaningful text.</p>
+        <p>Second paragraph with more content.</p>
+    </div>
+    <footer>Footer text</footer>
+</body>
+</html>
+'''
+
+        # Verify the HTML is actually large (should be ~20MB+)
+        html_size_mb = len(html) / (1024 * 1024)
+        assert html_size_mb > 15, f"HTML should be >15MB, got {html_size_mb:.2f}MB"
+
+        print(f"  Testing {html_size_mb:.2f}MB HTML document with bloated head...")
+
+        # This should not crash or silently exit
+        text = html_to_text(html)
+
+        # Verify we got actual text output (not empty/None)
+        assert text is not None, "html_to_text returned None"
+        assert len(text) > 0, "html_to_text returned empty string"
+
+        # Verify the actual body content was extracted
+        assert 'Important Heading' in text, "Failed to extract heading"
+        assert 'actual content that should be extracted' in text, "Failed to extract paragraph"
+        assert 'First paragraph with meaningful text' in text, "Failed to extract first paragraph"
+        assert 'Second paragraph with more content' in text, "Failed to extract second paragraph"
+        assert 'Footer text' in text, "Failed to extract footer"
+
+        # Verify bloat was stripped (output should be tiny compared to input)
+        text_size_kb = len(text) / 1024
+        assert text_size_kb < 1, f"Output too large ({text_size_kb:.2f}KB), bloat not stripped"
+
+        # Verify no CSS, script content, or SVG leaked through
+        assert 'color:red' not in text, "Style content leaked into text output"
+        assert 'console.log' not in text, "Script content leaked into text output"
+        assert '<path' not in text, "SVG content leaked into text output"
+        assert 'bloat.css' not in text, "Link href leaked into text output"
+
+        print(f"  ✓ Successfully processed {html_size_mb:.2f}MB HTML -> {text_size_kb:.2f}KB text")
+
+    def test_body_display_none_spa_pattern(self):
+        """
+        Test that html_to_text can extract content from pages with display:none body.
+
+        SPAs (Single Page Applications) often use <body style="display:none"> to hide content
+        until JavaScript loads and renders the page. inscriptis respects CSS display rules,
+        so without preprocessing, it would skip all content and return only newlines.
+
+        The fix strips display:none and visibility:hidden styles from the body tag before
+        processing, allowing text extraction from client-side rendered applications.
+        """
+        # Test case 1: Basic display:none
+        html1 = '''<!DOCTYPE html>
+<html lang="en">
+<head><title>What's New – Fluxguard</title></head>
+<body style="display:none">
+    <h1>Important Heading</h1>
+    <p>This is actual content that should be extracted.</p>
+    <div>
+        <p>First paragraph with meaningful text.</p>
+        <p>Second paragraph with more content.</p>
+    </div>
+</body>
+</html>'''
+
+        text1 = html_to_text(html1)
+
+        # Before fix: would return ~33 newlines, len(text) ~= 33
+        # After fix: should extract actual content, len(text) > 100
+        assert len(text1) > 100, f"Expected substantial text output, got {len(text1)} chars"
+        assert 'Important Heading' in text1, "Failed to extract heading from display:none body"
+        assert 'actual content' in text1, "Failed to extract paragraph from display:none body"
+        assert 'First paragraph' in text1, "Failed to extract nested content"
+
+        # Should not be mostly newlines
+        newline_ratio = text1.count('\n') / len(text1)
+        assert newline_ratio < 0.5, f"Output is mostly newlines ({newline_ratio:.2%}), content not extracted"
+
+        # Test case 2: visibility:hidden (another hiding pattern)
+        html2 = '<html><body style="visibility:hidden"><h1>Hidden Content</h1><p>Test paragraph.</p></body></html>'
+        text2 = html_to_text(html2)
+
+        assert 'Hidden Content' in text2, "Failed to extract content from visibility:hidden body"
+        assert 'Test paragraph' in text2, "Failed to extract paragraph from visibility:hidden body"
+
+        # Test case 3: Mixed styles (display:none with other CSS)
+        html3 = '<html><body style="color: red; display:none; font-size: 12px"><p>Mixed style content</p></body></html>'
+        text3 = html_to_text(html3)
+
+        assert 'Mixed style content' in text3, "Failed to extract content from body with mixed styles"
+
+        # Test case 4: Case insensitivity (DISPLAY:NONE uppercase)
+        html4 = '<html><body style="DISPLAY:NONE"><p>Uppercase style</p></body></html>'
+        text4 = html_to_text(html4)
+
+        assert 'Uppercase style' in text4, "Failed to handle uppercase DISPLAY:NONE"
+
+        # Test case 5: Space variations (display: none vs display:none)
+        html5 = '<html><body style="display: none"><p>With spaces</p></body></html>'
+        text5 = html_to_text(html5)
+
+        assert 'With spaces' in text5, "Failed to handle 'display: none' with space"
+
+        # Test case 6: Body with other attributes (class, id)
+        html6 = '<html><body class="foo" style="display:none" id="bar"><p>With attributes</p></body></html>'
+        text6 = html_to_text(html6)
+
+        assert 'With attributes' in text6, "Failed to extract from body with multiple attributes"
+
+        # Test case 7: Should NOT affect opacity:0 (which doesn't hide from inscriptis)
+        html7 = '<html><body style="opacity:0"><p>Transparent content</p></body></html>'
+        text7 = html_to_text(html7)
+
+        # Opacity doesn't affect inscriptis text extraction, content should be there
+        assert 'Transparent content' in text7, "Incorrectly stripped opacity:0 style"
+
+        print("  ✓ All display:none body tag tests passed")
+
+    def test_style_tag_with_svg_data_uri(self):
+        """
+        Test that style tags containing SVG data URIs are properly stripped.
+
+        Some WordPress and modern sites embed SVG as data URIs in CSS, which contains
+        <svg> and </svg> tags within the style content. The regex must use backreferences
+        to ensure <style> matches </style> (not </svg> inside the CSS).
+
+        This was causing errors where the regex would match <style> and stop at the first
+        </svg> it encountered inside a CSS data URI, breaking the HTML structure.
+        """
+        # Real-world example from WordPress wp-block-image styles
+        html = '''<!DOCTYPE html>
+<html>
+<head>
+    <style id='wp-block-image-inline-css'>
+.wp-block-image>a,.wp-block-image>figure>a{display:inline-block}.wp-block-image img{box-sizing:border-box;height:auto;max-width:100%;vertical-align:bottom}@supports ((-webkit-mask-image:none) or (mask-image:none)) or (-webkit-mask-image:none){.wp-block-image.is-style-circle-mask img{border-radius:0;-webkit-mask-image:url('data:image/svg+xml;utf8,<svg viewBox="0 0 100 100" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="50"/></svg>');mask-image:url('data:image/svg+xml;utf8,<svg viewBox="0 0 100 100" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="50"/></svg>');mask-mode:alpha}}
+    </style>
+</head>
+<body>
+    <h1>Test Heading</h1>
+    <p>This is the actual content that should be extracted.</p>
+    <div class="wp-block-image">
+        <img src="test.jpg" alt="Test image">
+    </div>
+</body>
+</html>'''
+
+        # This should not crash and should extract the body content
+        text = html_to_text(html)
+
+        # Verify the actual body content was extracted
+        assert text is not None, "html_to_text returned None"
+        assert len(text) > 0, "html_to_text returned empty string"
+        assert 'Test Heading' in text, "Failed to extract heading"
+        assert 'actual content that should be extracted' in text, "Failed to extract paragraph"
+
+        # Verify CSS content was stripped (including the SVG data URI)
+        assert '.wp-block-image' not in text, "CSS class selector leaked into text"
+        assert 'mask-image' not in text, "CSS property leaked into text"
+        assert 'data:image/svg+xml' not in text, "SVG data URI leaked into text"
+        assert 'viewBox' not in text, "SVG attributes leaked into text"
+
+        # Verify no broken HTML structure
+        assert '<style' not in text, "Unclosed style tag in output"
+        assert '</svg>' not in text, "SVG closing tag leaked into text"
+
+        print("  ✓ Style tag with SVG data URI test passed")
+
+    def test_style_tag_closes_correctly(self):
+        """
+        Test that each tag type (style, script, svg) closes with the correct closing tag.
+
+        Before the fix, the regex used (?:style|script|svg|noscript) for both opening and
+        closing tags, which meant <style> could incorrectly match </svg> as its closing tag.
+        With backreferences, <style> must close with </style>, <svg> with </svg>, etc.
+        """
+        # Test nested tags where incorrect matching would break
+        html = '''<!DOCTYPE html>
+<html>
+<head>
+    <style>
+        body { background: url('data:image/svg+xml,<svg><rect/></svg>'); }
+    </style>
+    <script>
+        const svg = '<svg><path d="M0,0"/></svg>';
+    </script>
+</head>
+<body>
+    <h1>Content</h1>
+    <svg><circle cx="50" cy="50" r="40"/></svg>
+    <p>After SVG</p>
+</body>
+</html>'''
+
+        text = html_to_text(html)
+
+        # Should extract body content
+        assert 'Content' in text, "Failed to extract heading"
+        assert 'After SVG' in text, "Failed to extract content after SVG"
+
+        # Should strip all style/script/svg content
+        assert 'background:' not in text, "Style content leaked"
+        assert 'const svg' not in text, "Script content leaked"
+        assert '<circle' not in text, "SVG element leaked"
+        assert 'data:image/svg+xml' not in text, "Data URI leaked"
+
+        print("  ✓ Tag closing validation test passed")
+
+

 if __name__ == '__main__':
    # Can run this file directly for quick testing
--- a/changedetectionio/tests/unit/test_time_handler.py
+++ b/changedetectionio/tests/unit/test_time_handler.py
@@ -8,6 +8,7 @@ python3 -m pytest changedetectionio/tests/unit/test_time_handler.py -v
 """

 import unittest
+import unittest.mock
 import arrow
 from changedetectionio import time_handler

@@ -240,6 +241,211 @@ class TestAmIInsideTime(unittest.TestCase):
        # Result depends on current time
        self.assertIsInstance(result, bool)

+    def test_24_hour_schedule_from_midnight(self):
+        """Test 24-hour schedule starting at midnight covers entire day."""
+        timezone_str = 'UTC'
+        # Test at a specific time: Monday 00:00
+        test_time = arrow.get('2024-01-01 00:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')  # Monday
+
+        # Mock current time for testing
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="00:00",
+                timezone_str=timezone_str,
+                duration=1440  # 24 hours
+            )
+            self.assertTrue(result, "Should be active at start of 24-hour schedule")
+
+    def test_24_hour_schedule_at_end_of_day(self):
+        """Test 24-hour schedule is active at 23:59:59."""
+        timezone_str = 'UTC'
+        # Test at Monday 23:59:59
+        test_time = arrow.get('2024-01-01 23:59:59', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')  # Monday
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="00:00",
+                timezone_str=timezone_str,
+                duration=1440  # 24 hours
+            )
+            self.assertTrue(result, "Should be active at end of 24-hour schedule")
+
+    def test_24_hour_schedule_at_midnight_transition(self):
+        """Test 24-hour schedule at exactly midnight transition."""
+        timezone_str = 'UTC'
+        # Test at Tuesday 00:00:00 (end of Monday's 24-hour schedule)
+        test_time = arrow.get('2024-01-02 00:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        monday = test_time.shift(days=-1).format('dddd')  # Monday
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=monday,
+                time_str="00:00",
+                timezone_str=timezone_str,
+                duration=1440  # 24 hours
+            )
+            self.assertTrue(result, "Should include exactly midnight at end of 24-hour schedule")
+
+    def test_schedule_crosses_midnight_before_midnight(self):
+        """Test schedule crossing midnight - before midnight."""
+        timezone_str = 'UTC'
+        # Monday 23:30
+        test_time = arrow.get('2024-01-01 23:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')  # Monday
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="23:00",
+                timezone_str=timezone_str,
+                duration=120  # 2 hours (until 01:00 next day)
+            )
+            self.assertTrue(result, "Should be active before midnight in cross-midnight schedule")
+
+    def test_schedule_crosses_midnight_after_midnight(self):
+        """Test schedule crossing midnight - after midnight."""
+        timezone_str = 'UTC'
+        # Tuesday 00:30
+        test_time = arrow.get('2024-01-02 00:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        monday = test_time.shift(days=-1).format('dddd')  # Monday
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=monday,
+                time_str="23:00",
+                timezone_str=timezone_str,
+                duration=120  # 2 hours (until 01:00 Tuesday)
+            )
+            self.assertTrue(result, "Should be active after midnight in cross-midnight schedule")
+
+    def test_schedule_crosses_midnight_at_exact_end(self):
+        """Test schedule crossing midnight at exact end time."""
+        timezone_str = 'UTC'
+        # Tuesday 01:00 (exact end of Monday 23:00 + 120 minutes)
+        test_time = arrow.get('2024-01-02 01:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        monday = test_time.shift(days=-1).format('dddd')  # Monday
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=monday,
+                time_str="23:00",
+                timezone_str=timezone_str,
+                duration=120  # 2 hours
+            )
+            self.assertTrue(result, "Should include exact end time of schedule")
+
+    def test_duration_60_minutes(self):
+        """Test that duration of 60 minutes works correctly."""
+        timezone_str = 'UTC'
+        test_time = arrow.get('2024-01-01 12:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="12:00",
+                timezone_str=timezone_str,
+                duration=60  # Exactly 60 minutes
+            )
+            self.assertTrue(result, "60-minute duration should work")
+
+    def test_duration_at_exact_end_minute(self):
+        """Test at exact end of 60-minute window."""
+        timezone_str = 'UTC'
+        # Exactly 13:00 (end of 12:00 + 60 minutes)
+        test_time = arrow.get('2024-01-01 13:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="12:00",
+                timezone_str=timezone_str,
+                duration=60
+            )
+            self.assertTrue(result, "Should include exact end minute")
+
+    def test_one_second_after_schedule_ends(self):
+        """Test one second after schedule should end."""
+        timezone_str = 'UTC'
+        # 13:00:01 (one second after 12:00 + 60 minutes)
+        test_time = arrow.get('2024-01-01 13:00:01', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="12:00",
+                timezone_str=timezone_str,
+                duration=60
+            )
+            self.assertFalse(result, "Should be False one second after schedule ends")
+
+    def test_multi_day_schedule(self):
+        """Test schedule longer than 24 hours (48 hours)."""
+        timezone_str = 'UTC'
+        # Tuesday 12:00 (36 hours after Monday 00:00)
+        test_time = arrow.get('2024-01-02 12:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        monday = test_time.shift(days=-1).format('dddd')
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=monday,
+                time_str="00:00",
+                timezone_str=timezone_str,
+                duration=2880  # 48 hours
+            )
+            self.assertTrue(result, "Should support multi-day schedules")
+
+    def test_schedule_one_minute_duration(self):
+        """Test very short 1-minute schedule."""
+        timezone_str = 'UTC'
+        test_time = arrow.get('2024-01-01 12:00:30', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="12:00",
+                timezone_str=timezone_str,
+                duration=1  # Just 1 minute
+            )
+            self.assertTrue(result, "1-minute schedule should work")
+
+    def test_schedule_at_exact_start_time(self):
+        """Test at exact start time (00:00:00.000000)."""
+        timezone_str = 'UTC'
+        test_time = arrow.get('2024-01-01 12:00:00.000000', 'YYYY-MM-DD HH:mm:ss.SSSSSS').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="12:00",
+                timezone_str=timezone_str,
+                duration=30
+            )
+            self.assertTrue(result, "Should include exact start time")
+
+    def test_schedule_one_microsecond_before_start(self):
+        """Test one microsecond before schedule starts."""
+        timezone_str = 'UTC'
+        test_time = arrow.get('2024-01-01 11:59:59.999999', 'YYYY-MM-DD HH:mm:ss.SSSSSS').replace(tzinfo=timezone_str)
+        day_of_week = test_time.format('dddd')
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.am_i_inside_time(
+                day_of_week=day_of_week,
+                time_str="12:00",
+                timezone_str=timezone_str,
+                duration=30
+            )
+            self.assertFalse(result, "Should not include time before start")
+

 class TestIsWithinSchedule(unittest.TestCase):
    """Tests for the is_within_schedule function."""
@@ -405,6 +611,175 @@ class TestIsWithinSchedule(unittest.TestCase):
        result = time_handler.is_within_schedule(time_schedule_limit)
        self.assertTrue(result, "Should handle timezone with whitespace")

+    def test_schedule_with_60_minutes(self):
+        """Test schedule with duration of 0 hours and 60 minutes."""
+        timezone_str = 'UTC'
+        now = arrow.now(timezone_str)
+        current_day = now.format('dddd').lower()
+        current_hour = now.format('HH:00')
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            current_day: {
+                'enabled': True,
+                'start_time': current_hour,
+                'duration': {'hours': 0, 'minutes': 60}  # 60 minutes
+            }
+        }
+
+        result = time_handler.is_within_schedule(time_schedule_limit)
+        self.assertTrue(result, "Should accept 60 minutes as valid duration")
+
+    def test_schedule_with_24_hours(self):
+        """Test schedule with duration of 24 hours and 0 minutes."""
+        timezone_str = 'UTC'
+        now = arrow.now(timezone_str)
+        current_day = now.format('dddd').lower()
+        start_hour = now.format('HH:00')
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            current_day: {
+                'enabled': True,
+                'start_time': start_hour,
+                'duration': {'hours': 24, 'minutes': 0}  # Full 24 hours
+            }
+        }
+
+        result = time_handler.is_within_schedule(time_schedule_limit)
+        self.assertTrue(result, "Should accept 24 hours as valid duration")
+
+    def test_schedule_with_90_minutes(self):
+        """Test schedule with duration of 0 hours and 90 minutes."""
+        timezone_str = 'UTC'
+        now = arrow.now(timezone_str)
+        current_day = now.format('dddd').lower()
+        current_hour = now.format('HH:00')
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            current_day: {
+                'enabled': True,
+                'start_time': current_hour,
+                'duration': {'hours': 0, 'minutes': 90}  # 90 minutes = 1.5 hours
+            }
+        }
+
+        result = time_handler.is_within_schedule(time_schedule_limit)
+        self.assertTrue(result, "Should accept 90 minutes as valid duration")
+
+    def test_schedule_24_hours_from_midnight(self):
+        """Test 24-hour schedule from midnight using is_within_schedule."""
+        timezone_str = 'UTC'
+        test_time = arrow.get('2024-01-01 12:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        current_day = test_time.format('dddd').lower()  # monday
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            current_day: {
+                'enabled': True,
+                'start_time': '00:00',
+                'duration': {'hours': 24, 'minutes': 0}
+            }
+        }
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.is_within_schedule(time_schedule_limit)
+            self.assertTrue(result, "24-hour schedule from midnight should cover entire day")
+
+    def test_schedule_24_hours_at_end_of_day(self):
+        """Test 24-hour schedule at 23:59 using is_within_schedule."""
+        timezone_str = 'UTC'
+        test_time = arrow.get('2024-01-01 23:59:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        current_day = test_time.format('dddd').lower()
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            current_day: {
+                'enabled': True,
+                'start_time': '00:00',
+                'duration': {'hours': 24, 'minutes': 0}
+            }
+        }
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.is_within_schedule(time_schedule_limit)
+            self.assertTrue(result, "Should be active at 23:59 in 24-hour schedule")
+
+    def test_schedule_crosses_midnight_with_is_within_schedule(self):
+        """Test schedule crossing midnight using is_within_schedule."""
+        timezone_str = 'UTC'
+        # Tuesday 00:30
+        test_time = arrow.get('2024-01-02 00:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
+        # Get Monday as that's when the schedule started
+        monday = test_time.shift(days=-1).format('dddd').lower()
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            'monday': {
+                'enabled': True,
+                'start_time': '23:00',
+                'duration': {'hours': 2, 'minutes': 0}  # Until 01:00 Tuesday
+            },
+            'tuesday': {
+                'enabled': False,
+                'start_time': '09:00',
+                'duration': {'hours': 8, 'minutes': 0}
+            }
+        }
+
+        with unittest.mock.patch('arrow.now', return_value=test_time):
+            result = time_handler.is_within_schedule(time_schedule_limit)
+            # Note: This checks Tuesday's schedule, not Monday's overlap
+            # So it should be False because Tuesday is disabled
+            self.assertFalse(result, "Should check current day (Tuesday), which is disabled")
+
+    def test_schedule_with_mixed_hours_minutes(self):
+        """Test schedule with both hours and minutes (23 hours 60 minutes = 24 hours)."""
+        timezone_str = 'UTC'
+        now = arrow.now(timezone_str)
+        current_day = now.format('dddd').lower()
+        current_hour = now.format('HH:00')
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            current_day: {
+                'enabled': True,
+                'start_time': current_hour,
+                'duration': {'hours': 23, 'minutes': 60}  # = 1440 minutes = 24 hours
+            }
+        }
+
+        result = time_handler.is_within_schedule(time_schedule_limit)
+        self.assertTrue(result, "Should handle 23 hours + 60 minutes = 24 hours")
+
+    def test_schedule_48_hours(self):
+        """Test schedule with 48-hour duration."""
+        timezone_str = 'UTC'
+        now = arrow.now(timezone_str)
+        current_day = now.format('dddd').lower()
+        start_hour = now.format('HH:00')
+
+        time_schedule_limit = {
+            'enabled': True,
+            'timezone': timezone_str,
+            current_day: {
+                'enabled': True,
+                'start_time': start_hour,
+                'duration': {'hours': 48, 'minutes': 0}  # 2 full days
+            }
+        }
+
+        result = time_handler.is_within_schedule(time_schedule_limit)
+        self.assertTrue(result, "Should support 48-hour (multi-day) schedules")
+

 class TestWeekdayEnum(unittest.TestCase):
    """Tests for the Weekday enum."""
--- a/changedetectionio/time_handler.py
+++ b/changedetectionio/time_handler.py
@@ -62,19 +62,19 @@ def am_i_inside_time(
        # Calculate start and end times for the overlap from the previous day
        start_datetime_tz = start_datetime_tz.shift(days=-1)
        end_datetime_tz = start_datetime_tz.shift(minutes=duration)
-        if start_datetime_tz <= now_tz < end_datetime_tz:
+        if start_datetime_tz <= now_tz <= end_datetime_tz:
            return True

    # Handle current day's range
    if target_weekday == current_weekday:
        end_datetime_tz = start_datetime_tz.shift(minutes=duration)
-        if start_datetime_tz <= now_tz < end_datetime_tz:
+        if start_datetime_tz <= now_tz <= end_datetime_tz:
            return True

    # Handle next day's overlap
    if target_weekday == (current_weekday + 1) % 7:
        end_datetime_tz = start_datetime_tz.shift(minutes=duration)
-        if now_tz < start_datetime_tz and now_tz.shift(days=1) < end_datetime_tz:
+        if now_tz < start_datetime_tz and now_tz.shift(days=1) <= end_datetime_tz:
            return True

    return False
--- a/changedetectionio/worker.py
+++ b/changedetectionio/worker.py
@@ -4,11 +4,10 @@ import changedetectionio.content_fetchers.exceptions as content_fetchers_excepti
 from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 from changedetectionio import html_tools
 from changedetectionio import worker_pool
-from changedetectionio.flask_app import watch_check_update
 from changedetectionio.queuedWatchMetaData import PrioritizedItem
+from changedetectionio.pluggy_interface import apply_update_handler_alter, apply_update_finalize

 import asyncio
-import importlib
 import os
 import sys
 import time
@@ -56,6 +55,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
    while not app.config.exit.is_set():
        update_handler = None
        watch = None
+        processing_exception = None  # Reset at start of each iteration to prevent state bleeding

        try:
            # Efficient blocking via run_in_executor (no polling overhead!)
@@ -119,7 +119,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
        # to prevent race condition with wait_for_all_checks()

        fetch_start_time = round(time.time())
-        
+
        try:
            if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
                changed_detected = False
@@ -136,6 +136,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")

                try:
+                    # Retrieve signal by name to ensure thread-safe access across worker threads
+                    watch_check_update = signal('watch_check_update')
                    watch_check_update.send(watch_uuid=uuid)

                    # Processor is what we are using for detecting the "Change"
@@ -154,6 +156,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                    update_handler = processor_module.perform_site_check(datastore=datastore,
                                                                         watch_uuid=uuid)

+                    # Allow plugins to modify/wrap the update_handler
+                    update_handler = apply_update_handler_alter(update_handler, watch, datastore)
+
                    update_signal = signal('watch_small_status_comment')
                    update_signal.send(watch_uuid=uuid, status="Fetching page..")

@@ -473,8 +478,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                            logger.exception(f"Worker {worker_id} full exception details:")
                            logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")

-
-
                    # Store favicon if necessary
                    if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
                        watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
@@ -498,6 +501,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                gc.collect()

        except Exception as e:
+            # Store the processing exception for plugin finalization hook
+            processing_exception = e

            logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
            logger.exception(f"Worker {worker_id} full exception details:")
@@ -509,6 +514,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
        finally:
            # Always cleanup - this runs whether there was an exception or not
            if uuid:
+                # Capture references for plugin finalize hook BEFORE cleanup
+                # (cleanup may delete these variables, but plugins need the original references)
+                finalize_handler = update_handler  # Capture now, before cleanup deletes it
+                finalize_watch = watch              # Capture now, before any modifications
+
                # Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
                try:
                    if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
@@ -518,12 +528,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                    logger.exception(f"Worker {worker_id} full exception details:")

                try:
-                    # Release UUID from processing (thread-safe)
-                    worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
-
-                    # Send completion signal
-                    if watch:
-                        watch_check_update.send(watch_uuid=watch['uuid'])

                    # Clean up all memory references BEFORE garbage collection
                    if update_handler:
@@ -547,7 +551,37 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                    logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
                    logger.exception(f"Worker {worker_id} full exception details:")

-            del(uuid)
+                # Call plugin finalization hook after all cleanup is done
+                # Use captured references from before cleanup
+                try:
+                    apply_update_finalize(
+                        update_handler=finalize_handler,
+                        watch=finalize_watch,
+                        datastore=datastore,
+                        processing_exception=processing_exception
+                    )
+                except Exception as finalize_error:
+                    logger.error(f"Worker {worker_id} error in finalize hook: {finalize_error}")
+                    logger.exception(f"Worker {worker_id} full exception details:")
+                finally:
+                    # Clean up captured references to allow immediate garbage collection
+                    del finalize_handler
+                    del finalize_watch
+
+                # Release UUID from processing AFTER all cleanup and hooks complete (thread-safe)
+                # This ensures wait_for_all_checks() waits for finalize hooks to complete
+                try:
+                    worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
+                except Exception as release_error:
+                    logger.error(f"Worker {worker_id} error releasing UUID: {release_error}")
+                    logger.exception(f"Worker {worker_id} full exception details:")
+                finally:
+                    # Send completion signal - retrieve by name to ensure thread-safe access
+                    if watch:
+                        watch_check_update = signal('watch_check_update')
+                        watch_check_update.send(watch_uuid=watch['uuid'])
+
+            del (uuid)

            # Brief pause before continuing to avoid tight error loops (only on error)
            if 'e' in locals():
Author	SHA1	Message	Date
dgtlmoon	e1b048f961	oops	2026-02-18 17:52:57 +01:00
dgtlmoon	9ba645d4cc	Do it threaded	2026-02-18 17:39:09 +01:00
dgtlmoon	e6c0d538e6	oops forgot module	2026-02-18 17:34:01 +01:00
dgtlmoon	e2fffc36e4	Small tidy	2026-02-18 17:32:16 +01:00
dgtlmoon	b9a2f781ac	Backups was missing tags	2026-02-18 17:29:45 +01:00
dgtlmoon	76abb4ab71	WIP	2026-02-18 17:26:44 +01:00
dgtlmoon	bca35f680e	0.53.4	2026-02-18 14:07:26 +01:00
dgtlmoon	fafea1b5c6	Updates/migration - Re-run tag update, re-save to cleanup changedetection.json, code refactor (#3898 )	2026-02-18 14:05:23 +01:00
dgtlmoon	93630e188d	UI - Search modal - fixes for running in sub path Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-18 10:27:12 +01:00
dgtlmoon	7e99d748b9	Puppeteer - Adding extra browser cleanup (#3897 )	2026-02-18 10:18:14 +01:00
dgtlmoon	352c91c619	Puppeteer - Use a modern scroll method for screenshot stitching	2026-02-18 10:01:22 +01:00
dgtlmoon	a6e55aaba9	UI - CSS - Ensure 'difference' 'preview' both wraps by word and by very long strings Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-17 17:08:44 +01:00
dgtlmoon	25a17bd49d	Fix: Some SPAs with long content - Stripping tags must also find matching close tag (#3895 )	2026-02-17 16:57:29 +01:00
dgtlmoon	954582a581	Fix: Some SPA's also set body content to display: none which breaks text output	2026-02-17 15:38:54 +01:00
dgtlmoon	d8ef86a8b5	"Error 200 no content" - Some very large SPA pages make HTML to Text fail by dumping 10Mb+ into page header, strip extras. (#3892 )	2026-02-17 14:44:03 +01:00
dgtlmoon	8711d29861	UI - Filters & Triggers - Adding reminder that you can also use 'Conditions' for trigger rules Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details	2026-02-17 02:55:18 +01:00
dgtlmoon	2343ddd88a	Minor code tidy	2026-02-17 02:46:22 +01:00
dgtlmoon	c6d6ef0e0c	Fix time schedule off-by-one bug at exact end times for all durations and add comprehensive edge case tests Re #846 (#3890 )	2026-02-17 02:38:16 +01:00
dgtlmoon	23063ad8a1	UI - More fixes for realtime updates	2026-02-17 02:37:03 +01:00
dgtlmoon	27b8a2d178	UI - Fixing realtime updates for status updates when checking (#3889 )	2026-02-17 02:26:38 +01:00
dgtlmoon	a53f2a784d	Pluggy plugin hook for before and after a watch is processed (#3888 )	2026-02-17 01:58:41 +01:00
dgtlmoon	7558ca5fda	0.53.3	2026-02-16 20:41:07 +01:00
dgtlmoon	383c3b427f	API - Adding automated test for API with NGINX sub-path, Skip validation errors about server path (allows use on sub-paths/reverse proxy etc) (#3886 )	2026-02-16 20:32:35 +01:00
dgtlmoon	b01ba5d8a1	UI - Use version from code in version tab Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-16 19:41:27 +01:00
dgtlmoon	86e5184cef	0.53.2	2026-02-16 18:52:31 +01:00
dgtlmoon	1dbf1f5db5	UI - Watch overview - Restock price, validate number before output (#3883 )	2026-02-16 18:50:37 +01:00