Also URL addition in upgrade

Use detactedh sha instead
extra test
2026-05-01 23:30:33 +00:00 · 2026-02-11 16:03:23 +01:00 · 2026-02-11 15:56:56 +01:00 · 2026-02-11 15:53:01 +01:00 · 2026-02-11 15:47:03 +01:00 · 2026-02-11 15:37:25 +01:00
43 changed files with 2746 additions and 1096 deletions
@@ -103,7 +103,7 @@ jobs:
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
+          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'         

  # Basic pytest tests with ancillary services
  basic-tests:
@@ -516,3 +516,142 @@ jobs:
            exit 1
          fi
          docker rm sig-test
+
+  # Upgrade path test
+  upgrade-path-test:
+    runs-on: ubuntu-latest
+    needs: build
+    timeout-minutes: 25
+    env:
+      PYTHON_VERSION: ${{ inputs.python-version }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0  # Fetch all history and tags for upgrade testing
+
+      - name: Set up Python ${{ env.PYTHON_VERSION }}
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Check upgrade works without error
+        run: |
+          echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
+
+          # Checkout old version and create datastore
+          git checkout 0.49.1
+          python3 -m venv .venv
+          source .venv/bin/activate
+          pip install -r requirements.txt
+          pip install 'pyOpenSSL>=23.2.0'
+
+          echo "=== Running version 0.49.1 to create datastore ==="
+          python3 ./changedetection.py -C -d /tmp/data &
+          APP_PID=$!
+
+          # Wait for app to be ready
+          echo "Waiting for 0.49.1 to be ready..."
+          sleep 6
+
+          # Extract API key from datastore (0.49.1 uses url-watches.json)
+          API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
+          echo "API Key: ${API_KEY:0:8}..."
+
+          # Create a watch with tag "github-group-test" via API
+          echo "Creating test watch with tag via API..."
+          curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
+            -H "x-api-key: ${API_KEY}" \
+            -H "Content-Type: application/json" \
+            --show-error --fail \
+            --retry 6 --retry-delay 1 --retry-connrefused \
+            -d '{
+              "url": "https://example.com/upgrade-test",
+              "tag": "github-group-test"
+            }'
+
+          echo "✓ Created watch with tag 'github-group-test'"
+
+          # Create a specific test URL watch
+          echo "Creating test URL watch via API..."
+          curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
+            -H "x-api-key: ${API_KEY}" \
+            -H "Content-Type: application/json" \
+            --show-error --fail \
+            -d '{
+              "url": "http://localhost/test.txt"
+            }'
+
+          echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
+
+          # Stop the old version gracefully
+          kill $APP_PID
+          wait $APP_PID || true
+          echo "✓ Version 0.49.1 stopped"
+
+          # Upgrade to current version (use commit SHA since we're in detached HEAD)
+          echo "Upgrading to commit ${{ github.sha }}"
+          git checkout ${{ github.sha }}
+          pip install -r requirements.txt
+
+          echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
+          TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
+
+          echo "=== Upgrade test output ==="
+          cat /tmp/upgrade-test.log
+          echo "✓ Datastore upgraded successfully"
+
+          # Now start the current version normally to verify the tag survived
+          echo "=== Starting current version to verify tag exists after upgrade ==="
+          timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
+          APP_PID=$!
+
+          # Wait for app to be ready and fetch UI
+          echo "Waiting for current version to be ready..."
+          sleep 5
+          curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
+
+          # Verify tag exists in UI
+          if grep -q "github-group-test" /tmp/ui-output.html; then
+            echo "✓ Tag 'github-group-test' found in UI after upgrade"
+          else
+            echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
+            echo "=== UI Output ==="
+            cat /tmp/ui-output.html
+            echo "=== App Log ==="
+            cat /tmp/ui-test.log
+            kill $APP_PID || true
+            exit 1
+          fi
+
+          # Verify test URL exists in UI
+          if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
+            echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
+          else
+            echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
+            echo "=== UI Output ==="
+            cat /tmp/ui-output.html
+            echo "=== App Log ==="
+            cat /tmp/ui-test.log
+            kill $APP_PID || true
+            exit 1
+          fi
+
+          # Cleanup
+          kill $APP_PID || true
+          wait $APP_PID || true
+
+          echo ""
+          echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
+          echo "    - Commit: ${{ github.sha }}"
+          echo "    - Datastore migrated successfully"
+          echo "    - Tag 'github-group-test' survived upgrade"
+          echo "    - Watch URL 'http://localhost/test.txt' survived upgrade"
+
+          echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
+
+      - name: Upload upgrade test logs
+        if: always()
+        uses: actions/upload-artifact@v6
+        with:
+          name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
+          path: /tmp/upgrade-test.log
@@ -112,9 +112,9 @@ def sigshutdown_handler(_signo, _stack_frame):
        from changedetectionio.flask_app import update_q, notification_q
        update_q.close()
        notification_q.close()
-        logger.debug("Janus queues closed successfully")
+        logger.debug("Queues closed successfully")
    except Exception as e:
-        logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
+        logger.critical(f"CRITICAL: Failed to close queues: {e}")
    
    # Shutdown socketio server fast
    from changedetectionio.flask_app import socketio_server
@@ -124,13 +124,9 @@ def sigshutdown_handler(_signo, _stack_frame):
        except Exception as e:
            logger.error(f"Error shutting down Socket.IO server: {str(e)}")
    
-    # Save data quickly - force immediate save using abstract method
-    try:
-        datastore.force_save_all()
-        logger.success('Fast sync to storage complete.')
-    except Exception as e:
-        logger.error(f"Error syncing to storage: {str(e)}")
-    
+    # With immediate persistence, all data is already saved
+    logger.success('All data already persisted (immediate commits enabled).')
+
    sys.exit()

 def print_help():
@@ -186,7 +182,6 @@ def main():
    from changedetectionio.flask_app import changedetection_app

    datastore_path = None
-    do_cleanup = False
    # Set a default logger level
    logger_level = 'DEBUG'
    include_default_watches = True
@@ -269,7 +264,7 @@ def main():
        i += 1

    try:
-        opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:P:", "port")
+        opts, args = getopt.getopt(cleaned_argv[1:], "6Csd:h:p:l:P:", "port")
    except getopt.GetoptError as e:
        print_help()
        print(f'Error: {e}')
@@ -297,10 +292,6 @@ def main():
        if opt == '-d':
            datastore_path = arg

-        # Cleanup (remove text files that arent in the index)
-        if opt == '-c':
-            do_cleanup = True
-
        # Create the datadir if it doesnt exist
        if opt == '-C':
            create_datastore_dir = True
@@ -380,7 +371,15 @@ def main():
        # Dont' start if the JSON DB looks corrupt
        logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
        logger.critical(str(e))
-        return
+        sys.exit(1)
+
+    # Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
+    if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
+        logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
+        logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
+        logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
+        logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
+        sys.exit(0)

    # Apply all_paused setting if specified via CLI
    if all_paused is not None:
@@ -606,10 +605,6 @@ def main():
    else:
        logger.info("SIGUSR1 handler only registered on Linux, skipped.")

-    # Go into cleanup mode
-    if do_cleanup:
-        datastore.remove_unused_snapshots()
-
    app.config['datastore_path'] = datastore_path


@@ -618,7 +613,7 @@ def main():
        return dict(right_sticky="v{}".format(datastore.data['version_tag']),
                    new_version_available=app.config['NEW_VERSION_AVAILABLE'],
                    has_password=datastore.data['settings']['application']['password'] != False,
-                    socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
+                    socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
                    all_paused=datastore.data['settings']['application'].get('all_paused', False),
                    all_muted=datastore.data['settings']['application'].get('all_muted', False)
                    )
@@ -2,8 +2,12 @@ from changedetectionio.strtobool import strtobool
 from flask_restful import abort, Resource
 from flask import request
 from functools import wraps
-from . import auth, validate_openapi_request
+from . import auth, validate_openapi_request, schema_create_watch
 from ..validate_url import is_safe_valid_url
+import json
+
+# Number of URLs above which import switches to background processing
+IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20


 def default_content_type(content_type='text/plain'):
@@ -19,6 +23,62 @@ def default_content_type(content_type='text/plain'):
    return decorator


+def convert_query_param_to_type(value, schema_property):
+    """
+    Convert a query parameter string to the appropriate type based on schema definition.
+
+    Args:
+        value: String value from query parameter
+        schema_property: Schema property definition with 'type' or 'anyOf' field
+
+    Returns:
+        Converted value in the appropriate type
+    """
+    # Handle anyOf schemas (extract the first type)
+    if 'anyOf' in schema_property:
+        # Use the first non-null type from anyOf
+        for option in schema_property['anyOf']:
+            if option.get('type') and option.get('type') != 'null':
+                prop_type = option.get('type')
+                break
+        else:
+            prop_type = None
+    else:
+        prop_type = schema_property.get('type')
+
+    # Handle array type (e.g., notification_urls)
+    if prop_type == 'array':
+        # Support both comma-separated and JSON array format
+        if value.startswith('['):
+            try:
+                return json.loads(value)
+            except json.JSONDecodeError:
+                return [v.strip() for v in value.split(',')]
+        return [v.strip() for v in value.split(',')]
+
+    # Handle object type (e.g., time_between_check, headers)
+    elif prop_type == 'object':
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError:
+            raise ValueError(f"Invalid JSON object for field: {value}")
+
+    # Handle boolean type
+    elif prop_type == 'boolean':
+        return strtobool(value)
+
+    # Handle integer type
+    elif prop_type == 'integer':
+        return int(value)
+
+    # Handle number type (float)
+    elif prop_type == 'number':
+        return float(value)
+
+    # Default: return as string
+    return value
+
+
 class Import(Resource):
    def __init__(self, **kwargs):
        # datastore is a black box dependency
@@ -28,40 +88,127 @@ class Import(Resource):
    @default_content_type('text/plain') #3547 #3542
    @validate_openapi_request('importWatches')
    def post(self):
-        """Import a list of watched URLs."""
+        """Import a list of watched URLs with optional watch configuration."""
+
+        # Special parameters that are NOT watch configuration
+        special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}

        extras = {}

+        # Handle special 'proxy' parameter
        if request.args.get('proxy'):
            plist = self.datastore.proxy_list
            if not request.args.get('proxy') in plist:
-                return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
+                proxy_list_str = ', '.join(plist) if plist else 'none configured'
+                return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
            else:
                extras['proxy'] = request.args.get('proxy')

+        # Handle special 'dedupe' parameter
        dedupe = strtobool(request.args.get('dedupe', 'true'))

+        # Handle special 'tag' and 'tag_uuids' parameters
        tags = request.args.get('tag')
        tag_uuids = request.args.get('tag_uuids')

        if tag_uuids:
            tag_uuids = tag_uuids.split(',')

+        # Extract ALL other query parameters as watch configuration
+        schema_properties = schema_create_watch.get('properties', {})
+        for param_name, param_value in request.args.items():
+            # Skip special parameters
+            if param_name in special_params:
+                continue
+
+            # Skip if not in schema (unknown parameter)
+            if param_name not in schema_properties:
+                return f"Unknown watch configuration parameter: {param_name}", 400
+
+            # Convert to appropriate type based on schema
+            try:
+                converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
+                extras[param_name] = converted_value
+            except (ValueError, json.JSONDecodeError) as e:
+                return f"Invalid value for parameter '{param_name}': {str(e)}", 400
+
+        # Validate processor if provided
+        if 'processor' in extras:
+            from changedetectionio.processors import available_processors
+            available = [p[0] for p in available_processors()]
+            if extras['processor'] not in available:
+                return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
+
+        # Validate fetch_backend if provided
+        if 'fetch_backend' in extras:
+            from changedetectionio.content_fetchers import available_fetchers
+            available = [f[0] for f in available_fetchers()]
+            # Also allow 'system' and extra_browser_* patterns
+            is_valid = (
+                extras['fetch_backend'] == 'system' or
+                extras['fetch_backend'] in available or
+                extras['fetch_backend'].startswith('extra_browser_')
+            )
+            if not is_valid:
+                return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
+
+        # Validate notification_urls if provided
+        if 'notification_urls' in extras:
+            from wtforms import ValidationError
+            from changedetectionio.api.Notifications import validate_notification_urls
+            try:
+                validate_notification_urls(extras['notification_urls'])
+            except ValidationError as e:
+                return f"Invalid notification_urls: {str(e)}", 400
+
        urls = request.get_data().decode('utf8').splitlines()
-        added = []
+        # Clean and validate URLs upfront
+        urls_to_import = []
        for url in urls:
            url = url.strip()
            if not len(url):
                continue

-            # If hosts that only contain alphanumerics are allowed ("localhost" for example)
+            # Validate URL
            if not is_safe_valid_url(url):
                return f"Invalid or unsupported URL - {url}", 400

+            # Check for duplicates if dedupe is enabled
            if dedupe and self.datastore.url_exists(url):
                continue

-            new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
-            added.append(new_uuid)
+            urls_to_import.append(url)

-        return added
+        # For small imports, process synchronously for immediate feedback
+        if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD:
+            added = []
+            for url in urls_to_import:
+                new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
+                added.append(new_uuid)
+            return added, 200
+
+        # For large imports (>= 20), process in background thread
+        else:
+            import threading
+            from loguru import logger
+
+            def import_watches_background():
+                """Background thread to import watches - discarded after completion."""
+                try:
+                    added_count = 0
+                    for url in urls_to_import:
+                        try:
+                            self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
+                            added_count += 1
+                        except Exception as e:
+                            logger.error(f"Error importing URL {url}: {e}")
+
+                    logger.info(f"Background import complete: {added_count} watches created")
+                except Exception as e:
+                    logger.error(f"Error in background import: {e}")
+
+            # Start background thread and return immediately
+            thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background")
+            thread.start()
+
+            return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202
@@ -67,7 +67,7 @@ class Notifications(Resource):

        clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
        self.datastore.data['settings']['application']['notification_urls'] = clean_urls
-        self.datastore.needs_write = True
+        self.datastore.commit()

        return {'notification_urls': clean_urls}, 200
        
@@ -95,7 +95,7 @@ class Notifications(Resource):
            abort(400, message="No matching notification URLs found.")

        self.datastore.data['settings']['application']['notification_urls'] = notification_urls
-        self.datastore.needs_write = True
+        self.datastore.commit()

        return 'OK', 204
    
@@ -24,8 +24,7 @@ class Tag(Resource):
    @validate_openapi_request('getTag')
    def get(self, uuid):
        """Get data for a single tag/group, toggle notification muting, or recheck all."""
-        from copy import deepcopy
-        tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
+        tag = self.datastore.data['settings']['application']['tags'].get(uuid)
        if not tag:
            abort(404, message=f'No tag exists with the UUID of {uuid}')

@@ -62,10 +61,12 @@ class Tag(Resource):
                return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202

        if request.args.get('muted', '') == 'muted':
-            self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
+            tag['notification_muted'] = True
+            tag.commit()
            return "OK", 200
        elif request.args.get('muted', '') == 'unmuted':
-            self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
+            tag['notification_muted'] = False
+            tag.commit()
            return "OK", 200

        return tag
@@ -79,11 +80,23 @@ class Tag(Resource):

        # Delete the tag, and any tag reference
        del self.datastore.data['settings']['application']['tags'][uuid]
-        
+
+        # Delete tag.json file if it exists
+        import os
+        tag_dir = os.path.join(self.datastore.datastore_path, uuid)
+        tag_json = os.path.join(tag_dir, "tag.json")
+        if os.path.exists(tag_json):
+            try:
+                os.unlink(tag_json)
+                logger.info(f"Deleted tag.json for tag {uuid}")
+            except Exception as e:
+                logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
+
        # Remove tag from all watches
        for watch_uuid, watch in self.datastore.data['watching'].items():
            if watch.get('tags') and uuid in watch['tags']:
                watch['tags'].remove(uuid)
+                watch.commit()

        return 'OK', 204

@@ -107,7 +120,7 @@ class Tag(Resource):
                return str(e), 400

        tag.update(request.json)
-        self.datastore.needs_write_urgent = True
+        tag.commit()

        return "OK", 200

@@ -84,15 +84,19 @@ class Watch(Resource):
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
            watch_obj.pause()
+            watch_obj.commit()
            return "OK", 200
        elif request.args.get('paused', '') == 'unpaused':
            watch_obj.unpause()
+            watch_obj.commit()
            return "OK", 200
        if request.args.get('muted', '') == 'muted':
            watch_obj.mute()
+            watch_obj.commit()
            return "OK", 200
        elif request.args.get('muted', '') == 'unmuted':
            watch_obj.unmute()
+            watch_obj.commit()
            return "OK", 200

        # Return without history, get that via another API call
@@ -173,6 +177,7 @@ class Watch(Resource):

        # Update watch with regular (non-processor-config) fields
        watch.update(json_data)
+        watch.commit()

        # Save processor config to JSON file
        processors.save_processor_config(self.datastore, uuid, processor_config_data)
@@ -369,10 +374,10 @@ class WatchFavicon(Resource):
        favicon_filename = watch.get_favicon_filename()
        if favicon_filename:
            # Use cached MIME type detection
-            filepath = os.path.join(watch.watch_data_dir, favicon_filename)
+            filepath = os.path.join(watch.data_dir, favicon_filename)
            mime = get_favicon_mime_type(filepath)

-            response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
+            response = make_response(send_from_directory(watch.data_dir, favicon_filename))
            response.headers['Content-type'] = mime
            response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate
            return response
@@ -419,8 +424,14 @@ class CreateWatch(Resource):
            except ValidationError as e:
                return str(e), 400

+        # Handle processor-config-* fields separately (save to JSON, not watch)
+        from changedetectionio import processors
+
        extras = copy.deepcopy(json_data)

+        # Extract and remove processor config fields from extras
+        processor_config_data = processors.extract_processor_config_from_form_data(extras)
+
        # Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
        tags = None
        if extras.get('tag'):
@@ -430,6 +441,10 @@ class CreateWatch(Resource):
        del extras['url']

        new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
+
+        # Save processor config to separate JSON file
+        if new_uuid and processor_config_data:
+            processors.save_processor_config(self.datastore, new_uuid, processor_config_data)
        if new_uuid:
 # Dont queue because the scheduler will check that it hasnt been checked before anyway
 #            worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
@@ -466,6 +481,7 @@ class CreateWatch(Resource):
                'last_error': watch['last_error'],
                'link': watch.link,
                'page_title': watch['page_title'],
+                'tags': [*tags],  # Unpack dict keys to list (can't use list() since variable named 'list')
                'title': watch['title'],
                'url': watch['url'],
                'viewed': watch.viewed
@@ -12,9 +12,17 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
 schema_create_watch = copy.deepcopy(schema)
 schema_create_watch['required'] = ['url']
 del schema_create_watch['properties']['last_viewed']
+# Allow processor_config_* fields (handled separately in endpoint)
+schema_create_watch['patternProperties'] = {
+    '^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
+}

 schema_update_watch = copy.deepcopy(schema)
 schema_update_watch['additionalProperties'] = False
+# Allow processor_config_* fields (handled separately in endpoint)
+schema_update_watch['patternProperties'] = {
+    '^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
+}

 # Tag schema is also based on watch_base since Tag inherits from it
 schema_tag = copy.deepcopy(schema)
@@ -47,7 +47,7 @@ def create_backup(datastore_path, watches: dict):

        # Add any data in the watch data directory.
        for uuid, w in watches.items():
-            for f in Path(w.watch_data_dir).glob('*'):
+            for f in Path(w.data_dir).glob('*'):
                zipObj.write(f,
                             # Use the full path to access the file, but make the file 'relative' in the Zip.
                             arcname=os.path.join(f.parts[-2], f.parts[-1]),
@@ -102,8 +102,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            flash(gettext("Maximum number of backups reached, please remove some"), "error")
            return redirect(url_for('backups.index'))

-        # Be sure we're written fresh - force immediate save using abstract method
-        datastore.force_save_all()
+        # With immediate persistence, all data is already saved
        zip_thread = threading.Thread(
            target=create_backup,
            args=(datastore.datastore_path, datastore.data.get("watching")),
@@ -285,8 +285,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        watch = datastore.data['watching'].get(uuid)
        filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg"

-        if step_n and watch and os.path.isfile(os.path.join(watch.watch_data_dir, filename)):
-            response = make_response(send_from_directory(directory=watch.watch_data_dir, path=filename))
+        if step_n and watch and os.path.isfile(os.path.join(watch.data_dir, filename)):
+            response = make_response(send_from_directory(directory=watch.data_dir, path=filename))
            response.headers['Content-type'] = 'image/jpeg'
            response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
            response.headers['Pragma'] = 'no-cache'
@@ -20,6 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
        datastore.data['watching'][uuid]['processor'] = 'restock_diff'
        datastore.data['watching'][uuid].clear_watch()
+        datastore.data['watching'][uuid].commit()
        worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
        return redirect(url_for("watchlist.index"))

@@ -27,6 +28,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
    @price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
    def reject(uuid):
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
+        datastore.data['watching'][uuid].commit()
        return redirect(url_for("watchlist.index"))


@@ -1,8 +1,9 @@
 import os
 from copy import deepcopy
-from datetime import datetime
+from datetime import datetime, timedelta
 from zoneinfo import ZoneInfo, available_timezones
 import secrets
+import time
 import flask_login
 from flask import Blueprint, render_template, request, redirect, url_for, flash
 from flask_babel import gettext
@@ -74,12 +75,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                    del (app_update['password'])

                datastore.data['settings']['application'].update(app_update)
-                
+
                # Handle dynamic worker count adjustment
                old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
                new_worker_count = form.data['requests'].get('workers', 1)

                datastore.data['settings']['requests'].update(form.data['requests'])
+                datastore.commit()

                # Adjust worker count if it changed
                if new_worker_count != old_worker_count:
@@ -109,13 +111,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):

                if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
                    datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
-                    datastore.needs_write_urgent = True
+                    datastore.commit()
                    flash(gettext("Password protection enabled."), 'notice')
                    flask_login.logout_user()
                    return redirect(url_for('watchlist.index'))

-                datastore.needs_write_urgent = True
-
                # Also save plugin settings from the same form submission
                plugin_tabs_list = get_plugin_settings_tabs()
                for tab in plugin_tabs_list:
@@ -143,6 +143,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        active_plugins = get_active_plugins()
        python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"

+        # Calculate uptime in seconds
+        uptime_seconds = time.time() - datastore.start_time
+
        # Get plugin settings tabs and instantiate forms
        plugin_tabs = get_plugin_settings_tabs()
        plugin_forms = {}
@@ -161,6 +164,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                                active_plugins=active_plugins,
                                api_key=datastore.data['settings']['application'].get('api_access_token'),
                                python_version=python_version,
+                                uptime_seconds=uptime_seconds,
                                available_timezones=sorted(available_timezones()),
                                emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
                                extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
@@ -181,7 +185,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def settings_reset_api_key():
        secret = secrets.token_hex(16)
        datastore.data['settings']['application']['api_access_token'] = secret
-        datastore.needs_write_urgent = True
+        datastore.commit()
        flash(gettext("API Key was regenerated."))
        return redirect(url_for('settings.settings_page')+'#api')
        
@@ -198,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def toggle_all_paused():
        current_state = datastore.data['settings']['application'].get('all_paused', False)
        datastore.data['settings']['application']['all_paused'] = not current_state
-        datastore.needs_write_urgent = True
+        datastore.commit()

        if datastore.data['settings']['application']['all_paused']:
            flash(gettext("Automatic scheduling paused - checks will not be queued."), 'notice')
@@ -212,7 +216,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def toggle_all_muted():
        current_state = datastore.data['settings']['application'].get('all_muted', False)
        datastore.data['settings']['application']['all_muted'] = not current_state
-        datastore.needs_write_urgent = True
+        datastore.commit()

        if datastore.data['settings']['application']['all_muted']:
            flash(gettext("All notifications muted."), 'notice')
@@ -394,6 +394,7 @@ nav
                {% endfor %}
            {% endif %}
            <div class="tab-pane-inner" id="info">
+                <p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
                <p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
                <p><strong>{{ _('Plugins active:') }}</strong></p>
                {% if active_plugins %}
@@ -57,8 +57,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    @tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
    @login_optionally_required
    def mute(uuid):
-        if datastore.data['settings']['application']['tags'].get(uuid):
-            datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
+        tag = datastore.data['settings']['application']['tags'].get(uuid)
+        if tag:
+            tag['notification_muted'] = not tag['notification_muted']
+            tag.commit()
        return redirect(url_for('tags.tags_overview_page'))

    @tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
@@ -68,6 +70,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        if datastore.data['settings']['application']['tags'].get(uuid):
            del datastore.data['settings']['application']['tags'][uuid]

+        # Delete tag.json file if it exists
+        import os
+        tag_dir = os.path.join(datastore.datastore_path, uuid)
+        tag_json = os.path.join(tag_dir, "tag.json")
+        if os.path.exists(tag_json):
+            try:
+                os.unlink(tag_json)
+                logger.info(f"Deleted tag.json for tag {uuid}")
+            except Exception as e:
+                logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
+
        # Remove tag from all watches in background thread to avoid blocking
        def remove_tag_background(tag_uuid):
            """Background thread to remove tag from watches - discarded after completion."""
@@ -76,6 +89,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                for watch_uuid, watch in datastore.data['watching'].items():
                    if watch.get('tags') and tag_uuid in watch['tags']:
                        watch['tags'].remove(tag_uuid)
+                        watch.commit()
                        removed_count += 1
                logger.info(f"Background: Tag {tag_uuid} removed from {removed_count} watches")
            except Exception as e:
@@ -98,6 +112,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                for watch_uuid, watch in datastore.data['watching'].items():
                    if watch.get('tags') and tag_uuid in watch['tags']:
                        watch['tags'].remove(tag_uuid)
+                        watch.commit()
                        unlinked_count += 1
                logger.info(f"Background: Tag {tag_uuid} unlinked from {unlinked_count} watches")
            except Exception as e:
@@ -112,6 +127,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    @tags_blueprint.route("/delete_all", methods=['GET'])
    @login_optionally_required
    def delete_all():
+        # Delete all tag.json files
+        import os
+        for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
+            tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
+            tag_json = os.path.join(tag_dir, "tag.json")
+            if os.path.exists(tag_json):
+                try:
+                    os.unlink(tag_json)
+                except Exception as e:
+                    logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
+
        # Clear all tags from settings immediately
        datastore.data['settings']['application']['tags'] = {}

@@ -122,6 +148,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            try:
                for watch_uuid, watch in datastore.data['watching'].items():
                    watch['tags'] = []
+                    watch.commit()
                    cleared_count += 1
                logger.info(f"Background: Cleared tags from {cleared_count} watches")
            except Exception as e:
@@ -202,10 +229,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        if uuid == 'first':
            uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()

-        default = datastore.data['settings']['application']['tags'].get(uuid)
+        tag = datastore.data['settings']['application']['tags'].get(uuid)

        form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None,
-                               data=default,
+                               data=tag,
                               extra_notification_tokens=datastore.get_unique_notification_tokens_available()
                               )
        # @todo subclass form so validation works
@@ -214,9 +241,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 #                flash(','.join(l), 'error')
 #           return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid))

-        datastore.data['settings']['application']['tags'][uuid].update(form.data)
-        datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
-        datastore.needs_write_urgent = True
+        tag.update(form.data)
+        tag['processor'] = 'restock_diff'
+        tag.commit()
        flash(gettext("Updated"))

        return redirect(url_for('tags.tags_overview_page'))
@@ -24,7 +24,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
        for uuid in uuids:
            if datastore.data['watching'].get(uuid):
                datastore.data['watching'][uuid]['paused'] = True
-                datastore.mark_watch_dirty(uuid)
+                datastore.data['watching'][uuid].commit()
        if emit_flash:
            flash(gettext("{} watches paused").format(len(uuids)))

@@ -32,7 +32,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
        for uuid in uuids:
            if datastore.data['watching'].get(uuid):
                datastore.data['watching'][uuid.strip()]['paused'] = False
-                datastore.mark_watch_dirty(uuid)
+                datastore.data['watching'][uuid].commit()
        if emit_flash:
            flash(gettext("{} watches unpaused").format(len(uuids)))

@@ -47,7 +47,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
        for uuid in uuids:
            if datastore.data['watching'].get(uuid):
                datastore.data['watching'][uuid]['notification_muted'] = True
-                datastore.mark_watch_dirty(uuid)
+                datastore.data['watching'][uuid].commit()
        if emit_flash:
            flash(gettext("{} watches muted").format(len(uuids)))

@@ -55,7 +55,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
        for uuid in uuids:
            if datastore.data['watching'].get(uuid):
                datastore.data['watching'][uuid]['notification_muted'] = False
-                datastore.mark_watch_dirty(uuid)
+                datastore.data['watching'][uuid].commit()
        if emit_flash:
            flash(gettext("{} watches un-muted").format(len(uuids)))

@@ -71,7 +71,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
        for uuid in uuids:
            if datastore.data['watching'].get(uuid):
                datastore.data['watching'][uuid]["last_error"] = False
-                datastore.mark_watch_dirty(uuid)
+                datastore.data['watching'][uuid].commit()
        if emit_flash:
            flash(gettext("{} watches errors cleared").format(len(uuids)))

@@ -92,6 +92,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
                datastore.data['watching'][uuid]['notification_body'] = None
                datastore.data['watching'][uuid]['notification_urls'] = []
                datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
+                datastore.data['watching'][uuid].commit()
        if emit_flash:
            flash(gettext("{} watches set to use default notification settings").format(len(uuids)))

@@ -107,6 +108,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
                            datastore.data['watching'][uuid]['tags'] = []

                        datastore.data['watching'][uuid]['tags'].append(tag_uuid)
+                        datastore.data['watching'][uuid].commit()
        if emit_flash:
            flash(gettext("{} watches were tagged").format(len(uuids)))

@@ -198,6 +198,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            # Recast it if need be to right data Watch handler
            watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
            datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid])
+
+            # Save the watch immediately
+            datastore.data['watching'][uuid].commit()
+
            flash(gettext("Updated watch - unpaused!") if request.args.get('unpause_on_save') else gettext("Updated watch."))

            # Cleanup any browsersteps session for this watch
@@ -207,10 +211,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            except Exception as e:
                logger.debug(f"Error cleaning up browsersteps session: {e}")

-            # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
-            # But in the case something is added we should save straight away
-            datastore.needs_write_urgent = True
-
            # Do not queue on edit if its not within the time range

            # @todo maybe it should never queue anyway on edit...
@@ -337,9 +337,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()
        watch = datastore.data['watching'].get(uuid)
-        if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
+        if watch and watch.history.keys() and os.path.isdir(watch.data_dir):
            latest_filename = list(watch.history.keys())[-1]
-            html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
+            html_fname = os.path.join(watch.data_dir, f"{latest_filename}.html.br")
            with open(html_fname, 'rb') as f:
                if html_fname.endswith('.br'):
                    # Read and decompress the Brotli file
@@ -386,6 +386,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
                    s = re.sub(r'[0-9]+', r'\\d+', s)
                    datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')

+            # Save the updated ignore_text
+            datastore.data["watching"][uuid].commit()
+
        return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
    
    return edit_blueprint
@@ -39,7 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            elif op == 'mute':
                datastore.data['watching'][uuid].toggle_mute()

-            datastore.needs_write = True
+            datastore.data['watching'][uuid].commit()
            return redirect(url_for('watchlist.index', tag = active_tag_uuid))

        # Sort by last_changed and add the uuid which is usually the key..
@@ -266,6 +266,47 @@ def _jinja2_filter_seconds_precise(timestamp):

    return format(int(time.time()-timestamp), ',d')

+@app.template_filter('format_duration')
+def _jinja2_filter_format_duration(seconds):
+    """Format a duration in seconds into human readable string like '5 days, 3 hours, 30 minutes'"""
+    from datetime import timedelta
+
+    if not seconds or seconds < 0:
+        return gettext('0 seconds')
+
+    td = timedelta(seconds=int(seconds))
+
+    # Calculate components
+    years = td.days // 365
+    remaining_days = td.days % 365
+    months = remaining_days // 30
+    remaining_days = remaining_days % 30
+    weeks = remaining_days // 7
+    days = remaining_days % 7
+
+    hours = td.seconds // 3600
+    minutes = (td.seconds % 3600) // 60
+    secs = td.seconds % 60
+
+    # Build parts list
+    parts = []
+    if years > 0:
+        parts.append(f"{years} {gettext('year') if years == 1 else gettext('years')}")
+    if months > 0:
+        parts.append(f"{months} {gettext('month') if months == 1 else gettext('months')}")
+    if weeks > 0:
+        parts.append(f"{weeks} {gettext('week') if weeks == 1 else gettext('weeks')}")
+    if days > 0:
+        parts.append(f"{days} {gettext('day') if days == 1 else gettext('days')}")
+    if hours > 0:
+        parts.append(f"{hours} {gettext('hour') if hours == 1 else gettext('hours')}")
+    if minutes > 0:
+        parts.append(f"{minutes} {gettext('minute') if minutes == 1 else gettext('minutes')}")
+    if secs > 0 or not parts:
+        parts.append(f"{secs} {gettext('second') if secs == 1 else gettext('seconds')}")
+
+    return ", ".join(parts)
+
@app.template_filter('fetcher_status_icons')
 def _jinja2_filter_fetcher_status_icons(fetcher_name):
    """Get status icon HTML for a given fetcher.
@@ -703,10 +744,10 @@ def changedetection_app(config=None, datastore_o=None):
            favicon_filename = watch.get_favicon_filename()
            if favicon_filename:
                # Use cached MIME type detection
-                filepath = os.path.join(watch.watch_data_dir, favicon_filename)
+                filepath = os.path.join(watch.data_dir, favicon_filename)
                mime = get_favicon_mime_type(filepath)

-                response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
+                response = make_response(send_from_directory(watch.data_dir, favicon_filename))
                response.headers['Content-type'] = mime
                response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate
                return response
@@ -807,7 +848,7 @@ def changedetection_app(config=None, datastore_o=None):
    app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')

    # Initialize Socket.IO server conditionally based on settings
-    socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
+    socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
    if socket_io_enabled and app.config.get('batch_mode'):
        socket_io_enabled = False
    if socket_io_enabled:
@@ -1,15 +1,48 @@
+"""
+Tag/Group domain model for organizing and overriding watch settings.
+
+ARCHITECTURE NOTE: Configuration Override Hierarchy
+===================================================
+
+Tags can override Watch settings when overrides_watch=True.
+Current implementation requires manual checking in processors:
+
+    for tag_uuid in watch.get('tags'):
+        tag = datastore['settings']['application']['tags'][tag_uuid]
+        if tag.get('overrides_watch'):
+            restock_settings = tag.get('restock_settings', {})
+            break
+
+With Pydantic, this would be automatic via chain resolution:
+    Watch → Tag (first with overrides_watch) → Global
+
+See: Watch.py model docstring for full Pydantic architecture explanation
+See: processors/restock_diff/processor.py:184-192 for current manual implementation
+"""

 from changedetectionio.model import watch_base
+from changedetectionio.model.persistence import EntityPersistenceMixin

+class model(EntityPersistenceMixin, watch_base):
+    """
+    Tag domain model - groups watches and can override their settings.

-class model(watch_base):
+    Tags inherit from watch_base to reuse all the same fields as Watch.
+    When overrides_watch=True, tag settings take precedence over watch settings
+    for all watches in this tag/group.
+
+    Fields:
+        overrides_watch (bool): If True, this tag's settings override watch settings
+        title (str): Display name for this tag/group
+        uuid (str): Unique identifier
+        ... (all fields from watch_base can be set as tag-level overrides)
+
+    Resolution order when overrides_watch=True:
+        Watch.field → Tag.field (if overrides_watch) → Global.field
+    """

    def __init__(self, *arg, **kw):
-        # Store datastore reference (optional for Tags, but good for consistency)
-        self.__datastore = kw.get('__datastore')
-        if kw.get('__datastore'):
-            del kw['__datastore']
-
+        # Parent class (watch_base) handles __datastore and __datastore_path
        super(model, self).__init__(*arg, **kw)

        self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
@@ -17,3 +50,7 @@ class model(watch_base):
        if kw.get('default'):
            self.update(kw['default'])
            del kw['default']
+
+    # _save_to_disk() method provided by EntityPersistenceMixin
+    # commit() and _get_commit_data() methods inherited from watch_base
+    # Tag uses default _get_commit_data() (includes all keys)
@@ -1,5 +1,29 @@
-import gc
-from copy import copy
+"""
+Watch domain model for change detection monitoring.
+
+ARCHITECTURE NOTE: Configuration Override Hierarchy
+===================================================
+
+This module implements Watch objects that inherit from dict (technical debt).
+The dream architecture would use Pydantic for:
+
+1. CHAIN RESOLUTION (Watch → Tag → Global Settings)
+   - Current: Manual resolution scattered across codebase
+   - Future: @computed_field properties with automatic resolution
+   - Examples: resolved_fetch_backend, resolved_restock_settings, etc.
+
+2. DATABASE BACKEND ABSTRACTION
+   - Current: Domain model tightly coupled to file-based JSON storage
+   - Future: Domain model (Pydantic) separate from persistence layer
+   - Enables: Easy migration to PostgreSQL, MongoDB, etc.
+
+3. TYPE SAFETY & VALIDATION
+   - Current: Dict access with no compile-time checks
+   - Future: Type hints, IDE autocomplete, validation at boundaries
+
+See class model docstring for detailed explanation and examples.
+See: processors/restock_diff/processor.py:184-192 for manual resolution example
+"""

 from blinker import signal
 from changedetectionio.validate_url import is_safe_valid_url
@@ -7,6 +31,7 @@ from changedetectionio.validate_url import is_safe_valid_url
 from changedetectionio.strtobool import strtobool
 from changedetectionio.jinja2_custom import render as jinja_render
 from . import watch_base
+from .persistence import EntityPersistenceMixin
 import os
 import re
 from pathlib import Path
@@ -103,22 +128,110 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
            raise Exception(f"Brotli compression failed for {filepath}: {e}")


-class model(watch_base):
+class model(EntityPersistenceMixin, watch_base):
+    """
+    Watch domain model for monitoring URL changes.
+
+    Inherits from watch_base (which inherits dict) - see watch_base docstring for field documentation.
+
+    ## Configuration Override Hierarchy (Chain Resolution)
+
+    The dream architecture uses a 3-level resolution chain:
+        Watch settings → Tag/Group settings → Global settings
+
+    Current implementation is MANUAL (see processor.py:184-192 for example):
+        - Processors manually check watch.get('field')
+        - Then loop through watch.tags to find first tag with overrides_watch=True
+        - Finally fall back to datastore['settings']['application']['field']
+
+    FUTURE: Pydantic-based chain resolution would enable:
+
+        ```python
+        # Instead of manual resolution in every processor:
+        restock_settings = watch.get('restock_settings', {})
+        for tag_uuid in watch.get('tags'):
+            tag = datastore['settings']['application']['tags'][tag_uuid]
+            if tag.get('overrides_watch'):
+                restock_settings = tag.get('restock_settings', {})
+                break
+
+        # Clean computed properties with automatic resolution:
+        @computed_field
+        def resolved_restock_settings(self) -> dict:
+            if self.restock_settings:
+                return self.restock_settings
+            for tag_uuid in self.tags:
+                tag = self._datastore.get_tag(tag_uuid)
+                if tag.overrides_watch and tag.restock_settings:
+                    return tag.restock_settings
+            return self._datastore.settings.restock_settings or {}
+
+        # Usage: watch.resolved_restock_settings (automatic, type-safe, tested once)
+        ```
+
+    Benefits of Pydantic migration:
+        1. Single source of truth for resolution logic (not scattered across processors)
+        2. Type safety + IDE autocomplete (watch.resolved_fetch_backend vs dict navigation)
+        3. Database backend abstraction (domain model separate from persistence)
+        4. Automatic validation at boundaries
+        5. Self-documenting via type hints
+        6. Easy to test resolution independently
+
+    Resolution chain examples that would benefit:
+        - fetch_backend: watch → tag → global (see get_fetch_backend property)
+        - notification_urls: watch → tag → global
+        - time_between_check: watch → global (see threshold_seconds)
+        - restock_settings: watch → tag (see processors/restock_diff/processor.py:184-192)
+        - history_snapshot_max_length: watch → global (see save_history_blob:550-556)
+        - All processor_config_* settings could use tag overrides
+
+    ## Database Backend Abstraction with Pydantic
+
+    Current: Watch inherits dict, tightly coupled to file-based JSON storage
+    Future: Domain model (Watch) separate from persistence layer
+
+        ```python
+        # Domain model (database-agnostic)
+        class Watch(BaseModel):
+            uuid: str
+            url: str
+            # ... validation, business logic
+
+        # Pluggable backends
+        class DataStoreBackend(ABC):
+            def save_watch(self, watch: Watch): ...
+            def load_watch(self, uuid: str) -> Watch: ...
+
+        # Implementations: FileBackend, MongoBackend, PostgresBackend, etc.
+        ```
+
+    This would enable:
+        - Easy migration between storage backends (file → postgres → mongodb)
+        - Pydantic handles serialization/deserialization automatically
+        - Domain logic stays clean (no storage concerns in Watch methods)
+
+    ## Migration Path
+
+    Given existing codebase, incremental migration recommended:
+        1. Create Pydantic models alongside existing dict-based models
+        2. Add .to_pydantic() / .from_pydantic() bridge methods
+        3. Gradually migrate code to use Pydantic models
+        4. Remove dict inheritance once migration complete
+
+    See: watch_base docstring for technical debt discussion
+    See: processors/restock_diff/processor.py:184-192 for manual resolution example
+    See: Watch.py:550-556 for nested dict navigation that would become watch.resolved_*
+    """
    __newest_history_key = None
    __history_n = 0
    jitter_seconds = 0

    def __init__(self, *arg, **kw):
-        self.__datastore_path = kw.get('datastore_path')
-        if kw.get('datastore_path'):
-            del kw['datastore_path']
-
-        self.__datastore = kw.get('__datastore')
-        if not self.__datastore:
+        # Validate __datastore before calling parent (Watch requires it)
+        if not kw.get('__datastore'):
            raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
-        if kw.get('__datastore'):
-            del kw['__datastore']

+        # Parent class (watch_base) handles __datastore and __datastore_path
        super(model, self).__init__(*arg, **kw)

        if kw.get('default'):
@@ -146,11 +259,6 @@ class model(watch_base):
    def has_unviewed(self):
        return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2

-    def ensure_data_dir_exists(self):
-        if not os.path.isdir(self.watch_data_dir):
-            logger.debug(f"> Creating data dir {self.watch_data_dir}")
-            os.mkdir(self.watch_data_dir)
-
    @property
    def link(self):

@@ -206,7 +314,8 @@ class model(watch_base):

        # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
        # But preserve processor config files (they're configuration, not history data)
-        for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
+        # Use glob not rglob here for safety.
+        for item in pathlib.Path(str(self.data_dir)).glob("*.*"):
            # Skip processor config files
            if item.name in processor_config_files:
                continue
@@ -243,8 +352,30 @@ class model(watch_base):
    @property
    def get_fetch_backend(self):
        """
-        Like just using the `fetch_backend` key but there could be some logic
-        :return:
+        Get the fetch backend for this watch with special case handling.
+
+        CHAIN RESOLUTION OPPORTUNITY:
+        Currently returns watch.fetch_backend directly, but doesn't implement
+        Watch → Tag → Global resolution chain. With Pydantic:
+
+        @computed_field
+        def resolved_fetch_backend(self) -> str:
+            # Special case: PDFs always use html_requests
+            if self.is_pdf:
+                return 'html_requests'
+
+            # Watch override
+            if self.fetch_backend and self.fetch_backend != 'system':
+                return self.fetch_backend
+
+            # Tag override (first tag with overrides_watch=True wins)
+            for tag_uuid in self.tags:
+                tag = self._datastore.get_tag(tag_uuid)
+                if tag.overrides_watch and tag.fetch_backend:
+                    return tag.fetch_backend
+
+            # Global default
+            return self._datastore.settings.fetch_backend
        """
        # Maybe also if is_image etc?
        # This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
@@ -293,11 +424,11 @@ class model(watch_base):
        tmp_history = {}

        # In the case we are only using the watch for processing without history
-        if not self.watch_data_dir:
+        if not self.data_dir:
            return []

        # Read the history file as a dict
-        fname = os.path.join(self.watch_data_dir, self.history_index_filename)
+        fname = os.path.join(self.data_dir, self.history_index_filename)
        if os.path.isfile(fname):
            logger.debug(f"Reading watch history index for {self.get('uuid')}")
            with open(fname, "r", encoding='utf-8') as f:
@@ -310,13 +441,13 @@ class model(watch_base):
                        # Cross-platform: check for any path separator (works on Windows and Unix)
                        if os.sep not in v and '/' not in v and '\\' not in v:
                            # Relative filename only, no path separators
-                            v = os.path.join(self.watch_data_dir, v)
+                            v = os.path.join(self.data_dir, v)
                        else:
                            # It's possible that they moved the datadir on older versions
                            # So the snapshot exists but is in a different path
                            # Cross-platform: use os.path.basename instead of split('/')
                            snapshot_fname = os.path.basename(v)
-                            proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
+                            proposed_new_path = os.path.join(self.data_dir, snapshot_fname)
                            if not os.path.exists(v) and os.path.exists(proposed_new_path):
                                v = proposed_new_path

@@ -333,7 +464,7 @@ class model(watch_base):

    @property
    def has_history(self):
-        fname = os.path.join(self.watch_data_dir, self.history_index_filename)
+        fname = os.path.join(self.data_dir, self.history_index_filename)
        return os.path.isfile(fname)

    @property
@@ -439,7 +570,7 @@ class model(watch_base):
    def _write_atomic(self, dest, data, mode='wb'):
        """Write data atomically to dest using a temp file"""
        import tempfile
-        with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.watch_data_dir) as tmp:
+        with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.data_dir) as tmp:
            tmp.write(data)
            tmp.flush()
            os.fsync(tmp.fileno())
@@ -448,7 +579,7 @@ class model(watch_base):

    def history_trim(self, newest_n_items):
        from pathlib import Path
-
+        import gc
        # Sort by timestamp (key)
        sorted_items = sorted(self.history.items(), key=lambda x: int(x[0]))

@@ -465,7 +596,7 @@ class model(watch_base):
                finally:
                    logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot")
        try:
-            dest = os.path.join(self.watch_data_dir, self.history_index_filename)
+            dest = os.path.join(self.data_dir, self.history_index_filename)
            output = "\r\n".join(
                f"{k},{Path(v).name}"
                for k, v in keep_part.items()
@@ -504,7 +635,7 @@ class model(watch_base):
                ext = 'bin'

            snapshot_fname = f"{snapshot_id}.{ext}"
-            dest = os.path.join(self.watch_data_dir, snapshot_fname)
+            dest = os.path.join(self.data_dir, snapshot_fname)
            self._write_atomic(dest, contents)
            logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")

@@ -514,7 +645,7 @@ class model(watch_base):
                # Compressed text
                import brotli
                snapshot_fname = f"{snapshot_id}.txt.br"
-                dest = os.path.join(self.watch_data_dir, snapshot_fname)
+                dest = os.path.join(self.data_dir, snapshot_fname)

                if not os.path.exists(dest):
                    try:
@@ -525,16 +656,16 @@ class model(watch_base):
                        logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
                        # Fallback to uncompressed
                        snapshot_fname = f"{snapshot_id}.txt"
-                        dest = os.path.join(self.watch_data_dir, snapshot_fname)
+                        dest = os.path.join(self.data_dir, snapshot_fname)
                        self._write_atomic(dest, contents.encode('utf-8'))
            else:
                # Plain text
                snapshot_fname = f"{snapshot_id}.txt"
-                dest = os.path.join(self.watch_data_dir, snapshot_fname)
+                dest = os.path.join(self.data_dir, snapshot_fname)
                self._write_atomic(dest, contents.encode('utf-8'))

        # Append to history.txt atomically
-        index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
+        index_fname = os.path.join(self.data_dir, self.history_index_filename)
        index_line = f"{timestamp},{snapshot_fname}\n"

        with open(index_fname, 'a', encoding='utf-8') as f:
@@ -546,11 +677,13 @@ class model(watch_base):
        self.__newest_history_key = timestamp
        self.__history_n += 1

-
-        maxlen = (
-                self.get('history_snapshot_max_length')
-                or (self.__datastore and self.__datastore['settings']['application'].get('history_snapshot_max_length'))
-        )
+        # MANUAL CHAIN RESOLUTION: Watch → Global
+        # With Pydantic, this would become: maxlen = watch.resolved_history_snapshot_max_length
+        # @computed_field def resolved_history_snapshot_max_length(self) -> Optional[int]:
+        #     if self.history_snapshot_max_length: return self.history_snapshot_max_length
+        #     if tag := self._get_override_tag(): return tag.history_snapshot_max_length
+        #     return self._datastore.settings.history_snapshot_max_length
+        maxlen = self.get('history_snapshot_max_length') or self.get_global_setting('application', 'history_snapshot_max_length')

        if maxlen and self.__history_n and self.__history_n > maxlen:
            self.history_trim(newest_n_items=maxlen)
@@ -607,7 +740,7 @@ class model(watch_base):
        return not local_lines.issubset(existing_history)

    def get_screenshot(self):
-        fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
+        fname = os.path.join(self.data_dir, "last-screenshot.png")
        if os.path.isfile(fname):
            return fname

@@ -622,7 +755,7 @@ class model(watch_base):
        if not favicon_fname:
            return True
        try:
-            fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
+            fname = next(iter(glob.glob(os.path.join(self.data_dir, "favicon.*"))), None)
            logger.trace(f"Favicon file maybe found at {fname}")
            if os.path.isfile(fname):
                file_age = int(time.time() - os.path.getmtime(fname))
@@ -655,7 +788,7 @@ class model(watch_base):
            base = "favicon"
            extension = "ico"

-        fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
+        fname = os.path.join(self.data_dir, f"favicon.{extension}")

        try:
            # validate=True makes sure the string only contains valid base64 chars
@@ -702,7 +835,7 @@ class model(watch_base):
        import glob

        # Search for all favicon.* files
-        files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
+        files = glob.glob(os.path.join(self.data_dir, "favicon.*"))

        if not files:
            result = None
@@ -729,7 +862,7 @@ class model(watch_base):
        import os
        import time

-        thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
+        thumbnail_path = os.path.join(self.data_dir, "thumbnail.jpeg")
        top_trim = 500  # Pixels from top of screenshot to use

        screenshot_path = self.get_screenshot()
@@ -780,7 +913,7 @@ class model(watch_base):
            return None

    def __get_file_ctime(self, filename):
-        fname = os.path.join(self.watch_data_dir, filename)
+        fname = os.path.join(self.data_dir, filename)
        if os.path.isfile(fname):
            return int(os.path.getmtime(fname))
        return False
@@ -805,14 +938,9 @@ class model(watch_base):
    def snapshot_error_screenshot_ctime(self):
        return self.__get_file_ctime('last-error-screenshot.png')

-    @property
-    def watch_data_dir(self):
-        # The base dir of the watch data
-        return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
-
    def get_error_text(self):
        """Return the text saved from a previous request that resulted in a non-200 error"""
-        fname = os.path.join(self.watch_data_dir, "last-error.txt")
+        fname = os.path.join(self.data_dir, "last-error.txt")
        if os.path.isfile(fname):
            with open(fname, 'r', encoding='utf-8') as f:
                return f.read()
@@ -820,7 +948,7 @@ class model(watch_base):

    def get_error_snapshot(self):
        """Return path to the screenshot that resulted in a non-200 error"""
-        fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
+        fname = os.path.join(self.data_dir, "last-error-screenshot.png")
        if os.path.isfile(fname):
            return fname
        return False
@@ -844,6 +972,37 @@ class model(watch_base):
    def toggle_mute(self):
        self['notification_muted'] ^= True

+    def _get_commit_data(self):
+        """
+        Prepare watch data for commit.
+
+        Excludes processor_config_* keys (stored in separate files).
+        Normalizes browser_steps to empty list if no meaningful steps.
+        """
+        import copy
+
+        # Get base snapshot with lock
+        lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
+
+        if lock:
+            with lock:
+                snapshot = dict(self)
+        else:
+            snapshot = dict(self)
+
+        # Exclude processor config keys (stored separately)
+        watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
+
+        # Normalize browser_steps: if no meaningful steps, save as empty list
+        if not self.has_browser_steps:
+            watch_dict['browser_steps'] = []
+
+        return watch_dict
+
+    # _save_to_disk() method provided by EntityPersistenceMixin
+    # commit() method inherited from watch_base
+
+
    def extra_notification_token_values(self):
        # Used for providing extra tokens
        # return {'widget': 555}
@@ -873,7 +1032,7 @@ class model(watch_base):
                        if not csv_writer:
                            # A file on the disk can be transferred much faster via flask than a string reply
                            csv_output_filename = f"report-{self.get('uuid')}.csv"
-                            f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
+                            f = open(os.path.join(self.data_dir, csv_output_filename), 'w')
                            # @todo some headers in the future
                            #fieldnames = ['Epoch seconds', 'Date']
                            csv_writer = csv.writer(f,
@@ -915,7 +1074,7 @@ class model(watch_base):

    def save_error_text(self, contents):
        self.ensure_data_dir_exists()
-        target_path = os.path.join(self.watch_data_dir, "last-error.txt")
+        target_path = os.path.join(self.data_dir, "last-error.txt")
        with open(target_path, 'w', encoding='utf-8') as f:
            f.write(contents)

@@ -924,9 +1083,9 @@ class model(watch_base):
        import zlib

        if as_error:
-            target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
+            target_path = os.path.join(str(self.data_dir), "elements-error.deflate")
        else:
-            target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")
+            target_path = os.path.join(str(self.data_dir), "elements.deflate")

        self.ensure_data_dir_exists()

@@ -941,9 +1100,9 @@ class model(watch_base):
    def save_screenshot(self, screenshot: bytes, as_error=False):

        if as_error:
-            target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
+            target_path = os.path.join(self.data_dir, "last-error-screenshot.png")
        else:
-            target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
+            target_path = os.path.join(self.data_dir, "last-screenshot.png")

        self.ensure_data_dir_exists()

@@ -954,7 +1113,7 @@ class model(watch_base):

    def get_last_fetched_text_before_filters(self):
        import brotli
-        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
+        filepath = os.path.join(self.data_dir, 'last-fetched.br')

        if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
            # If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
@@ -969,13 +1128,13 @@ class model(watch_base):

    def save_last_text_fetched_before_filters(self, contents):
        import brotli
-        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
+        filepath = os.path.join(self.data_dir, 'last-fetched.br')
        _brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)

    def save_last_fetched_html(self, timestamp, contents):
        self.ensure_data_dir_exists()
        snapshot_fname = f"{timestamp}.html.br"
-        filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+        filepath = os.path.join(self.data_dir, snapshot_fname)
        _brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
        self._prune_last_fetched_html_snapshots()

@@ -983,7 +1142,7 @@ class model(watch_base):
        import brotli

        snapshot_fname = f"{timestamp}.html.br"
-        filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+        filepath = os.path.join(self.data_dir, snapshot_fname)
        if os.path.isfile(filepath):
            with open(filepath, 'rb') as f:
                return (brotli.decompress(f.read()).decode('utf-8'))
@@ -998,7 +1157,7 @@ class model(watch_base):

        for index, timestamp in enumerate(dates):
            snapshot_fname = f"{timestamp}.html.br"
-            filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+            filepath = os.path.join(self.data_dir, snapshot_fname)

            # Keep only the first 2
            if index > 1 and os.path.isfile(filepath):
@@ -1009,7 +1168,7 @@ class model(watch_base):
    def get_browsersteps_available_screenshots(self):
        "For knowing which screenshots are available to show the user in BrowserSteps UI"
        available = []
-        for f in Path(self.watch_data_dir).glob('step_before-*.jpeg'):
+        for f in Path(self.data_dir).glob('step_before-*.jpeg'):
            step_n=re.search(r'step_before-(\d+)', f.name)
            if step_n:
                available.append(step_n.group(1))
@@ -2,12 +2,169 @@ import os
 import uuid

 from changedetectionio import strtobool
+from .persistence import EntityPersistenceMixin, _determine_entity_type
+
+__all__ = ['EntityPersistenceMixin', 'watch_base']
+
 USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
 CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'

+
 class watch_base(dict):
+    """
+    Base watch domain model (inherits from dict for backward compatibility).
+
+    WARNING: This class inherits from dict, which violates proper encapsulation.
+    Dict inheritance is legacy technical debt that should be refactored to a proper
+    domain model (e.g., Pydantic BaseModel) for better type safety and validation.
+
+    TODO: Migrate to Pydantic BaseModel for:
+          - Type safety and IDE autocomplete
+          - Automatic validation
+          - Clear separation between domain model and serialization
+          - Database backend abstraction (file → postgres → mongodb)
+          - Configuration override chain resolution (Watch → Tag → Global)
+          - Immutability options
+          - Better testing
+
+    CHAIN RESOLUTION ARCHITECTURE:
+        The dream is a 3-level override hierarchy:
+            Watch settings → Tag/Group settings → Global settings
+
+        Current implementation: MANUAL resolution scattered across codebase
+        - Processors manually check watch.get('field')
+        - Loop through tags to find overrides_watch=True
+        - Fall back to datastore['settings']['application']['field']
+
+        Pydantic implementation: AUTOMATIC resolution via @computed_field
+        - Single source of truth for each setting's resolution logic
+        - Type-safe, testable, self-documenting
+        - Example: watch.resolved_fetch_backend (instead of nested dict navigation)
+
+        See: Watch.py model docstring for detailed Pydantic architecture plan
+        See: Tag.py model docstring for tag override explanation
+        See: processors/restock_diff/processor.py:184-192 for current manual example
+
+    Core Fields:
+        uuid (str): Unique identifier for this watch (auto-generated)
+        url (str): Target URL to monitor for changes
+        title (str|None): Custom display name (overrides page_title if set)
+        page_title (str|None): Title extracted from <title> tag of monitored page
+        tags (List[str]): List of tag UUIDs for categorization
+        tag (str): DEPRECATED - Old single-tag system, use tags instead
+
+    Check Configuration:
+        processor (str): Processor type ('text_json_diff', 'restock_diff', etc.)
+        fetch_backend (str): Fetcher to use ('system', 'html_requests', 'playwright', etc.)
+        method (str): HTTP method ('GET', 'POST', etc.)
+        headers (dict): Custom HTTP headers to send
+        proxy (str|None): Preferred proxy server
+        paused (bool): Whether change detection is paused
+
+    Scheduling:
+        time_between_check (dict): Check interval {'weeks': int, 'days': int, 'hours': int, 'minutes': int, 'seconds': int}
+        time_between_check_use_default (bool): Use global default interval if True
+        time_schedule_limit (dict): Weekly schedule limiting when checks can run
+            Structure: {
+                'enabled': bool,
+                'monday/tuesday/.../sunday': {
+                    'enabled': bool,
+                    'start_time': str ('HH:MM'),
+                    'duration': {'hours': str, 'minutes': str}
+                }
+            }
+
+    Content Filtering:
+        include_filters (List[str]): CSS/XPath selectors to extract content
+        subtractive_selectors (List[str]): Selectors to remove from content
+        ignore_text (List[str]): Text patterns to ignore in change detection
+        trigger_text (List[str]): Text/regex that must be present to trigger change
+        text_should_not_be_present (List[str]): Text that should NOT be present
+        extract_text (List[str]): Regex patterns to extract specific text after filtering
+
+    Text Processing:
+        trim_text_whitespace (bool): Strip leading/trailing whitespace
+        sort_text_alphabetically (bool): Sort lines alphabetically before comparison
+        remove_duplicate_lines (bool): Remove duplicate lines
+        check_unique_lines (bool): Compare against all history for unique lines
+        strip_ignored_lines (bool|None): Remove lines matching ignore patterns
+
+    Change Detection Filters:
+        filter_text_added (bool): Include added text in change detection
+        filter_text_removed (bool): Include removed text in change detection
+        filter_text_replaced (bool): Include replaced text in change detection
+
+    Browser Automation:
+        browser_steps (List[dict]): Browser automation steps for JS-heavy sites
+        browser_steps_last_error_step (int|None): Last step that caused error
+        webdriver_delay (int|None): Seconds to wait after page load
+        webdriver_js_execute_code (str|None): JavaScript to execute before extraction
+
+    Restock Detection:
+        in_stock_only (bool): Only trigger on in-stock transitions
+        follow_price_changes (bool): Monitor price changes
+        has_ldjson_price_data (bool|None): Whether page has LD-JSON price data
+        track_ldjson_price_data (str|None): Track LD-JSON price data ('ACCEPT', 'REJECT', None)
+        price_change_threshold_percent (float|None): Minimum price change % to trigger
+
+    Notifications:
+        notification_urls (List[str]): Apprise URLs for notifications
+        notification_title (str|None): Custom notification title template
+        notification_body (str|None): Custom notification body template
+        notification_format (str): Notification format (e.g., 'System default', 'Text', 'HTML')
+        notification_muted (bool): Disable notifications for this watch
+        notification_screenshot (bool): Include screenshot in notifications
+        notification_alert_count (int): Number of notifications sent
+        last_notification_error (str|None): Last notification error message
+        body (str|None): DEPRECATED? Legacy notification body field
+        filter_failure_notification_send (bool): Send notification on filter failures
+
+    History & State:
+        date_created (int|None): Unix timestamp of watch creation
+        last_checked (int): Unix timestamp of last check
+        last_viewed (int): History snapshot key of last user view
+        last_error (str|bool): Last error message or False if no error
+        check_count (int): Total number of checks performed
+        fetch_time (float): Duration of last fetch in seconds
+        consecutive_filter_failures (int): Counter for consecutive filter match failures
+        previous_md5 (str|bool): MD5 hash of previous content
+        previous_md5_before_filters (str|bool): MD5 hash before filters applied
+        history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
+
+    Conditions:
+        conditions (dict): Custom conditions for change detection logic
+        conditions_match_logic (str): Logic operator ('ALL', 'ANY') for conditions
+
+    Metadata:
+        content-type (str|None): Content-Type from last fetch
+        remote_server_reply (str|None): Server header from last response
+        ignore_status_codes (List[int]|None): HTTP status codes to ignore
+        use_page_title_in_list (bool|None): Display page title in watch list (None = use system default)
+
+    Instance Attributes (not serialized):
+        __datastore: Reference to parent DataStore (set externally after creation)
+        data_dir: Filesystem path for this watch's data directory
+
+    Notes:
+        - Many fields default to None to distinguish "not set" from "set to default"
+        - When field is None, system-level defaults are used
+        - Processor-specific configs (e.g., processor_config_*) are NOT stored in watch.json
+          They are stored in separate {processor_name}.json files
+        - This class is used for both Watch and Tag objects (tags reuse the structure)
+    """

    def __init__(self, *arg, **kw):
+        # Store datastore reference (common to Watch and Tag)
+        # Use single underscore to avoid name mangling issues in subclasses
+        self._datastore = kw.get('__datastore')
+        if kw.get('__datastore'):
+            del kw['__datastore']
+
+        # Store datastore_path (common to Watch and Tag)
+        self._datastore_path = kw.get('datastore_path')
+        if kw.get('datastore_path'):
+            del kw['datastore_path']
+
        self.update({
            # Custom notification content
            # Re #110, so then if this is set to None, we know to use the default value instead
@@ -177,8 +334,10 @@ class watch_base(dict):
                    attr_value = getattr(self, attr_name)

                    # Special handling: Share references to large objects instead of copying
-                    # Examples: __datastore, __app_reference, __global_settings, etc.
-                    if attr_name.endswith('__datastore') or attr_name.endswith('__app'):
+                    # Examples: _datastore, __datastore, __app_reference, __global_settings, etc.
+                    if (attr_name == '_datastore' or
+                        attr_name.endswith('__datastore') or
+                        attr_name.endswith('__app')):
                        # Share the reference (don't copy!) to prevent memory leaks
                        setattr(new_obj, attr_name, attr_value)
                    # Skip cache attributes - let them regenerate on demand
@@ -208,7 +367,8 @@ class watch_base(dict):
                try:
                    attr_value = getattr(self, attr_name)
                    # Exclude large reference objects and caches from serialization
-                    if not (attr_name.endswith('__datastore') or
+                    if not (attr_name == '_datastore' or
+                           attr_name.endswith('__datastore') or
                           attr_name.endswith('__app') or
                           'cache' in attr_name.lower() or
                           callable(attr_value)):
@@ -236,4 +396,124 @@ class watch_base(dict):

        # Restore instance attributes
        for attr_name, attr_value in metadata.items():
-            setattr(self, attr_name, attr_value)
+            setattr(self, attr_name, attr_value)
+
+    @property
+    def data_dir(self):
+        """
+        The base directory for this watch/tag data (property, computed from UUID).
+
+        Common property for both Watch and Tag objects.
+        Returns path like: /datastore/{uuid}/
+        """
+        return os.path.join(self._datastore_path, self['uuid']) if self._datastore_path else None
+
+    def ensure_data_dir_exists(self):
+        """
+        Create the data directory if it doesn't exist.
+
+        Common method for both Watch and Tag objects.
+        """
+        from loguru import logger
+        if not os.path.isdir(self.data_dir):
+            logger.debug(f"> Creating data dir {self.data_dir}")
+            os.mkdir(self.data_dir)
+
+    def get_global_setting(self, *path):
+        """
+        Get a setting from the global datastore configuration.
+
+        Args:
+            *path: Path to the setting (e.g., 'application', 'history_snapshot_max_length')
+
+        Returns:
+            The setting value, or None if not found
+
+        Example:
+            maxlen = self.get_global_setting('application', 'history_snapshot_max_length')
+        """
+        if not self._datastore:
+            return None
+
+        try:
+            value = self._datastore['settings']
+            for key in path:
+                value = value[key]
+            return value
+        except (KeyError, TypeError):
+            return None
+
+    def _get_commit_data(self):
+        """
+        Prepare data for commit (can be overridden by subclasses).
+
+        Returns:
+            dict: Data to serialize (filtered as needed by subclass)
+        """
+        import copy
+
+        # Acquire datastore lock to prevent concurrent modifications during copy
+        lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
+
+        if lock:
+            with lock:
+                snapshot = dict(self)
+        else:
+            snapshot = dict(self)
+
+        # Deep copy snapshot (slower, but done outside lock to minimize contention)
+        # Subclasses can override to filter keys (e.g., Watch excludes processor_config_*)
+        return {k: copy.deepcopy(v) for k, v in snapshot.items()}
+
+    def _save_to_disk(self, data_dict, uuid):
+        """
+        Save data to disk (must be implemented by subclasses).
+
+        Args:
+            data_dict: Dictionary to save
+            uuid: UUID for logging
+
+        Raises:
+            NotImplementedError: If subclass doesn't implement
+        """
+        raise NotImplementedError("Subclass must implement _save_to_disk()")
+
+    def commit(self):
+        """
+        Save this watch/tag immediately to disk using atomic write.
+
+        Common commit logic for Watch and Tag objects.
+        Subclasses override _get_commit_data() and _save_to_disk() for specifics.
+
+        Fire-and-forget: Logs errors but does not raise exceptions.
+        Data remains in memory even if save fails, so next commit will retry.
+        """
+        from loguru import logger
+
+        if not self.data_dir:
+            entity_type = self.__class__.__name__
+            logger.error(f"Cannot commit {entity_type} {self.get('uuid')} without datastore_path")
+            return
+
+        uuid = self.get('uuid')
+        if not uuid:
+            entity_type = self.__class__.__name__
+            logger.error(f"Cannot commit {entity_type} without UUID")
+            return
+
+        # Get data from subclass (may filter keys)
+        try:
+            data_dict = self._get_commit_data()
+        except Exception as e:
+            logger.error(f"Failed to prepare commit data for {uuid}: {e}")
+            return
+
+        # Save to disk via subclass implementation
+        try:
+            # Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
+            entity_type = _determine_entity_type(self.__class__)
+            filename = f"{entity_type}.json"
+            self._save_to_disk(data_dict, uuid)
+            logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
+        except Exception as e:
+            logger.error(f"Failed to commit {uuid}: {e}")
@@ -0,0 +1,84 @@
+"""
+Entity persistence mixin for Watch and Tag models.
+
+Provides file-based persistence using atomic writes.
+"""
+
+import functools
+import inspect
+
+
+@functools.lru_cache(maxsize=None)
+def _determine_entity_type(cls):
+    """
+    Determine entity type from class hierarchy (cached at class level).
+
+    Args:
+        cls: The class to inspect
+
+    Returns:
+        str: Entity type ('watch', 'tag', etc.)
+
+    Raises:
+        ValueError: If entity type cannot be determined
+    """
+    for base_class in inspect.getmro(cls):
+        module_name = base_class.__module__
+        if module_name.startswith('changedetectionio.model.'):
+            # Get last part after dot: "changedetectionio.model.Watch" -> "watch"
+            return module_name.split('.')[-1].lower()
+
+    raise ValueError(
+        f"Cannot determine entity type for {cls.__module__}.{cls.__name__}. "
+        f"Entity must inherit from a class in changedetectionio.model (Watch or Tag)."
+    )
+
+
+class EntityPersistenceMixin:
+    """
+    Mixin providing file persistence for watch_base subclasses (Watch, Tag, etc.).
+
+    This mixin provides the _save_to_disk() method required by watch_base.commit().
+    It automatically determines the correct filename and size limits based on class hierarchy.
+
+    Usage:
+        class model(EntityPersistenceMixin, watch_base):  # in Watch.py
+            pass
+
+        class model(EntityPersistenceMixin, watch_base):  # in Tag.py
+            pass
+    """
+
+    def _save_to_disk(self, data_dict, uuid):
+        """
+        Save entity to disk using atomic write.
+
+        Implements the abstract method required by watch_base.commit().
+        Automatically determines filename and size limits from class hierarchy.
+
+        Args:
+            data_dict: Dictionary to save
+            uuid: UUID for logging
+
+        Raises:
+            ValueError: If entity type cannot be determined from class hierarchy
+        """
+        # Import here to avoid circular dependency
+        from changedetectionio.store.file_saving_datastore import save_entity_atomic
+
+        # Determine entity type (cached at class level, not instance level)
+        entity_type = _determine_entity_type(self.__class__)
+
+        # Set filename and size limits based on entity type
+        filename = f'{entity_type}.json'
+        max_size_mb = 10 if entity_type == 'watch' else 1
+
+        # Save using generic function
+        save_entity_atomic(
+            self.data_dir,
+            uuid,
+            data_dict,
+            filename=filename,
+            entity_type=entity_type,
+            max_size_mb=max_size_mb
+        )
@@ -193,12 +193,12 @@ class difference_detection_processor():
        import os

        watch = self.datastore.data['watching'].get(self.watch_uuid)
-        watch_data_dir = watch.watch_data_dir
+        data_dir = watch.data_dir

-        if not watch_data_dir:
+        if not data_dir:
            return {}

-        filepath = os.path.join(watch_data_dir, filename)
+        filepath = os.path.join(data_dir, filename)

        if not os.path.isfile(filepath):
            return {}
@@ -223,16 +223,16 @@ class difference_detection_processor():
        import os

        watch = self.datastore.data['watching'].get(self.watch_uuid)
-        watch_data_dir = watch.watch_data_dir
+        data_dir = watch.data_dir

-        if not watch_data_dir:
-            logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
+        if not data_dir:
+            logger.warning(f"Cannot save extra watch config {filename}: no data_dir")
            return

        # Ensure directory exists
        watch.ensure_data_dir_exists()

-        filepath = os.path.join(watch_data_dir, filename)
+        filepath = os.path.join(data_dir, filename)

        try:
            # If merge is enabled, read existing data first
@@ -414,7 +414,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)

    # Load historical data if available (for charts/visualization)
    comparison_data = {}
-    comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
+    comparison_config_path = os.path.join(watch.data_dir, "visual_comparison_data.json")
    if os.path.isfile(comparison_config_path):
        try:
            with open(comparison_config_path, 'r') as f:
@@ -90,7 +90,7 @@ def on_config_save(watch, processor_config, datastore):
            processor_config['auto_track_region'] = False

            # Remove old template file if exists
-            template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
+            template_path = os.path.join(watch.data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
            if os.path.exists(template_path):
                os.remove(template_path)
                logger.debug(f"Removed old template file: {template_path}")
@@ -193,18 +193,17 @@ class perform_site_check(difference_detection_processor):


        itemprop_availability = {}
+        multiple_prices_found = False

        # Try built-in extraction first, this will scan metadata in the HTML
        try:
            itemprop_availability = get_itemprop_availability(self.fetcher.content)
        except MoreThanOnePriceFound as e:
-            # Add the real data
-            raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
-                                     url=watch.get('url'),
-                                     status_code=self.fetcher.get_last_status_code(),
-                                     screenshot=self.fetcher.screenshot,
-                                     xpath_data=self.fetcher.xpath_data
-                                     )
+            # Don't raise immediately - let plugins try to handle this case
+            # Plugins might be able to determine which price is correct
+            logger.warning(f"Built-in detection found multiple prices on {watch.get('url')}, will try plugin override")
+            multiple_prices_found = True
+            itemprop_availability = {}

        # If built-in extraction didn't get both price AND availability, try plugin override
        # Only check plugin if this watch is using a fetcher that might provide better data
@@ -216,9 +215,21 @@ class perform_site_check(difference_detection_processor):
            from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
            fetcher_name = watch.get('fetch_backend', 'html_requests')

-            # Only try plugin override if not using system default (which might be anything)
-            if fetcher_name and fetcher_name != 'system':
-                logger.debug("Calling extra plugins for getting item price/availability")
+            # Resolve 'system' to the actual fetcher being used
+            # This allows plugins to work even when watch uses "system settings default"
+            if fetcher_name == 'system':
+                # Get the actual fetcher that was used (from self.fetcher)
+                # Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
+                actual_fetcher = type(self.fetcher).__name__
+                if 'html_requests' in actual_fetcher.lower():
+                    fetcher_name = 'html_requests'
+                elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
+                    fetcher_name = 'html_webdriver'
+                logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")
+
+            # Try plugin override - plugins can decide if they support this fetcher
+            if fetcher_name:
+                logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})")
                plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)

                if plugin_availability:
@@ -233,6 +244,16 @@ class perform_site_check(difference_detection_processor):
                if not plugin_availability:
                    logger.debug("No item price/availability from plugins")

+        # If we had multiple prices and plugins also failed, NOW raise the exception
+        if multiple_prices_found and not itemprop_availability.get('price'):
+            raise ProcessorException(
+                message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
+                url=watch.get('url'),
+                status_code=self.fetcher.get_last_status_code(),
+                screenshot=self.fetcher.screenshot,
+                xpath_data=self.fetcher.xpath_data
+            )
+
        # Something valid in get_itemprop_availability() by scraping metadata ?
        if itemprop_availability.get('price') or itemprop_availability.get('availability'):
            # Store for other usage
@@ -55,7 +55,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):

    tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))

-    if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
+    if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.data_dir):
        # Splice in the temporary stuff from the form
        form = forms.processor_text_json_diff_form(formdata=form_data if request.method == 'POST' else None,
                                                   data=form_data
@@ -68,7 +68,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
        blank_watch_no_filters['url'] = tmp_watch.get('url')

        latest_filename = next(reversed(tmp_watch.history))
-        html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
+        html_fname = os.path.join(tmp_watch.data_dir, f"{latest_filename}.html.br")
        with open(html_fname, 'rb') as f:
            decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')

@@ -184,7 +184,8 @@ $(document).ready(function() {
        }
        // If it's a button in a form, submit the form
        else if ($element.is('button')) {
-          $element.closest('form').submit();
+          // Use requestSubmit() to include the button's name/value in the form data
+          $element.closest('form')[0].requestSubmit($element[0]);
        }
      }
    };
@@ -33,9 +33,8 @@ except ImportError:
 from ..processors import get_custom_watch_obj_for_processor

 # Import the base class and helpers
-from .file_saving_datastore import FileSavingDataStore, load_all_watches, save_watch_atomic, save_json_atomic
+from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_json_atomic
 from .updates import DatastoreUpdatesMixin
-from .legacy_loader import has_legacy_datastore

 # Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
 BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
@@ -56,9 +55,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        # Should only be active for docker
        # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
        self.datastore_path = datastore_path
-        self.needs_write = False
        self.start_time = time.time()
-        self.stop_thread = False
        self.save_version_copy_json_db(version_tag)
        self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag)

@@ -80,7 +77,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
            copyfile(db_path, db_path_version_backup)

-    def _load_settings(self):
+    def _load_settings(self, filename="changedetection.json"):
        """
        Load settings from storage.

@@ -89,7 +86,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        Returns:
            dict: Settings data loaded from storage
        """
-        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
+        changedetection_json = os.path.join(self.datastore_path, filename)

        logger.info(f"Loading settings from {changedetection_json}")

@@ -124,11 +121,23 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            if 'application' in settings_data['settings']:
                self.__data['settings']['application'].update(settings_data['settings']['application'])

+        # More or less for the old format which had this data in the one url-watches.json
+        # cant hurt to leave it here,
+        if 'watching' in settings_data:
+            self.__data['watching'].update(settings_data['watching'])
+
    def _rehydrate_tags(self):
-        """Rehydrate tag entities from stored data."""
+        """Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
+        from ..model import Tag
+
        for uuid, tag in self.__data['settings']['application']['tags'].items():
-            self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
-                uuid, tag, processor_override='restock_diff'
+            # Force processor to restock_diff for override functionality (technical debt)
+            tag['processor'] = 'restock_diff'
+
+            self.__data['settings']['application']['tags'][uuid] = Tag.model(
+                datastore_path=self.datastore_path,
+                __datastore=self.__data,
+                default=tag
            )
            logger.info(f"Tag: {uuid} {tag['title']}")

@@ -141,25 +150,34 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        logger.info(f"Rehydrating {watch_count} watches...")
        watching_rehydrated = {}
        for uuid, watch_dict in self.__data.get('watching', {}).items():
-            watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
+            if isinstance(watch_dict, dict):
+                watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
+            else:
+                logger.error(f"Watch UUID {uuid} already rehydrated")
+
        self.__data['watching'] = watching_rehydrated
        logger.success(f"Rehydrated {watch_count} watches into Watch objects")


-    def _load_state(self):
+    def _load_state(self, main_settings_filename="changedetection.json"):
        """
        Load complete datastore state from storage.

-        Orchestrates loading of settings and watches using polymorphic methods.
+        Orchestrates loading of settings, watches, and tags using polymorphic methods.
        """
        # Load settings
-        settings_data = self._load_settings()
+        settings_data = self._load_settings(filename=main_settings_filename)
        self._apply_settings(settings_data)

-        # Load watches (polymorphic - parent class method)
+        # Load watches, scan them from the disk
        self._load_watches()
+        self._rehydrate_watches()

-        # Rehydrate tags
+        # Load tags from individual tag.json files
+        # These will override any tags in settings (migration path)
+        self._load_tags()
+
+        # Rehydrate any remaining tags from settings (legacy/fallback)
        self._rehydrate_tags()

    def reload_state(self, datastore_path, include_default_watches, version_tag):
@@ -191,88 +209,73 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

        # Check if datastore already exists
        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
+        changedetection_json_old_schema = os.path.join(self.datastore_path, "url-watches.json")

        if os.path.exists(changedetection_json):
-            # Load existing datastore (changedetection.json + watch.json files)
-            logger.info("Loading existing datastore")
-            try:
-                self._load_state()
-            except Exception as e:
-                logger.critical(f"Failed to load datastore: {e}")
-                raise
-
            # Run schema updates if needed
            # Pass current schema version from loaded datastore (defaults to 0 if not set)
+            # Load existing datastore (changedetection.json + watch.json files)
+            logger.info("Loading existing datastore")
+            self._load_state()
+            current_schema = self.data['settings']['application'].get('schema_version', 0)
+            self.run_updates(current_schema_version=current_schema)
+
+        # Legacy datastore detected - trigger migration, even works if the schema is much before the migration step.
+        elif os.path.exists(changedetection_json_old_schema):
+
+            logger.critical(f"Legacy datastore detected at {changedetection_json_old_schema}, loading and running updates")
+            self._load_state(main_settings_filename="url-watches.json")
+            # update 26 will load the whole old config from disk to __data
            current_schema = self.__data['settings']['application'].get('schema_version', 0)
            self.run_updates(current_schema_version=current_schema)
+            # Probably tags were also shifted to disk and many other changes, so best to reload here.
+            self._load_state()

        else:
            # No datastore yet - check if this is a fresh install or legacy migration
-            # Generate app_guid FIRST (required for all operations)
-            if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
-                self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
-            else:
-                self.__data['app_guid'] = str(uuid_builder.uuid4())
+            self.init_fresh_install(include_default_watches=include_default_watches,
+                                    version_tag=version_tag)

-            # Generate RSS access token
-            self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
+    def init_fresh_install(self, include_default_watches, version_tag):
+      # Generate app_guid FIRST (required for all operations)
+        if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
+            self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
+        else:
+            self.__data['app_guid'] = str(uuid_builder.uuid4())

-            # Generate API access token
-            self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
+        # Generate RSS access token
+        self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)

-            # Check if legacy datastore exists (url-watches.json)
-            if has_legacy_datastore(self.datastore_path):
-                # Legacy datastore detected - trigger migration
-                logger.critical(f"Legacy datastore detected at {self.datastore_path}/url-watches.json")
-                logger.critical("Migration will be triggered via update_26")
+        # Generate API access token
+        self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
+        logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")

-                # Load the legacy datastore
-                from .legacy_loader import load_legacy_format
-                legacy_path = os.path.join(self.datastore_path, "url-watches.json")
-                legacy_data = load_legacy_format(legacy_path)
+        # Set schema version to latest (no updates needed)
+        latest_update_available = self.get_updates_available().pop()
+        logger.info(f"Marking fresh install to schema version {latest_update_available}")
+        self.__data['settings']['application']['schema_version'] = latest_update_available

-                if not legacy_data:
-                    raise Exception("Failed to load legacy datastore from url-watches.json")
+        # Add default watches if requested
+        if include_default_watches:
+            self.add_watch(
+                url='https://news.ycombinator.com/',
+                tag='Tech news',
+                extras={'fetch_backend': 'html_requests'}
+            )
+            self.add_watch(
+                url='https://changedetection.io/CHANGELOG.txt',
+                tag='changedetection.io',
+                extras={'fetch_backend': 'html_requests'}
+            )

-                # Store the loaded data
-                self.__data = legacy_data
-
-                # CRITICAL: Rehydrate watches from dicts into Watch objects
-                # This ensures watches have their methods available during migration
-                self._rehydrate_watches()
-
-                # update_26 will save watches to individual files and create changedetection.json
-                # Next startup will load from new format normally
-                self.run_updates()
+        # Create changedetection.json immediately
+        try:
+            self._save_settings()
+            logger.info("Created changedetection.json for new datastore")
+        except Exception as e:
+            logger.error(f"Failed to create initial changedetection.json: {e}")


-            else:
-                # Fresh install - create new datastore
-                logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
-
-                # Set schema version to latest (no updates needed)
-                updates_available = self.get_updates_available()
-                self.__data['settings']['application']['schema_version'] = updates_available.pop() if updates_available else 26
-
-                # Add default watches if requested
-                if include_default_watches:
-                    self.add_watch(
-                        url='https://news.ycombinator.com/',
-                        tag='Tech news',
-                        extras={'fetch_backend': 'html_requests'}
-                    )
-                    self.add_watch(
-                        url='https://changedetection.io/CHANGELOG.txt',
-                        tag='changedetection.io',
-                        extras={'fetch_backend': 'html_requests'}
-                    )
-
-                # Create changedetection.json immediately
-                try:
-                    self._save_settings()
-                    logger.info("Created changedetection.json for new datastore")
-                except Exception as e:
-                    logger.error(f"Failed to create initial changedetection.json: {e}")

        # Set version tag
        self.__data['version_tag'] = version_tag
@@ -286,19 +289,19 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
                self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
            else:
                self.__data['app_guid'] = str(uuid_builder.uuid4())
-            self.mark_settings_dirty()
+            self.commit()

        # Ensure RSS access token exists
        if not self.__data['settings']['application'].get('rss_access_token'):
            secret = secrets.token_hex(16)
            self.__data['settings']['application']['rss_access_token'] = secret
-            self.mark_settings_dirty()
+            self.commit()

        # Ensure API access token exists
        if not self.__data['settings']['application'].get('api_access_token'):
            secret = secrets.token_hex(16)
            self.__data['settings']['application']['api_access_token'] = secret
-            self.mark_settings_dirty()
+            self.commit()

        # Handle password reset lockfile
        password_reset_lockfile = os.path.join(self.datastore_path, "removepassword.lock")
@@ -306,9 +309,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            self.remove_password()
            unlink(password_reset_lockfile)

-        # Start the background save thread
-        self.start_save_thread()
-
    def rehydrate_entity(self, uuid, entity, processor_override=None):
        """Set the dict back to the dict Watch object"""
        entity['uuid'] = uuid
@@ -341,13 +341,22 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        """
        Build settings data structure for saving.

+        Tags behavior depends on schema version:
+        - Before update_28 (schema < 28): Tags saved in settings for migration
+        - After update_28 (schema >= 28): Tags excluded from settings (in individual files)
+
        Returns:
            dict: Settings data ready for serialization
        """
+        import copy
+
+        # Deep copy settings to avoid modifying the original
+        settings_copy = copy.deepcopy(self.__data['settings'])
+
        return {
-            'note': 'Settings file - watches are stored in individual {uuid}/watch.json files',
-            'app_guid': self.__data['app_guid'],
-            'settings': self.__data['settings'],
+            'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
+            'app_guid': self.__data.get('app_guid'),
+            'settings': settings_copy,
            'build_sha': self.__data.get('build_sha'),
            'version_tag': self.__data.get('version_tag')
        }
@@ -365,7 +374,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        """
        settings_data = self._build_settings_data()
        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
-        save_json_atomic(changedetection_json, settings_data, label="settings", max_size_mb=10)
+        save_json_atomic(changedetection_json, settings_data, label="settings")

    def _load_watches(self):
        """
@@ -375,22 +384,45 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        Implementation of abstract method from FileSavingDataStore.
        Delegates to helper function and stores results in internal data structure.
        """
-        watching, watch_hashes = load_all_watches(
-            self.datastore_path,
-            self.rehydrate_entity,
-            self._compute_hash
-        )

        # Store loaded data
-        self.__data['watching'] = watching
-        self._watch_hashes = watch_hashes
+        # @note this will also work for the old legacy format because self.__data['watching'] should already have them loaded by this point.
+        self.__data['watching'].update(load_all_watches(
+            self.datastore_path,
+            self.rehydrate_entity
+        ))
+        logger.debug(f"Loaded {len(self.__data['watching'])} watches")

-        # Verify all watches have hashes
-        missing_hashes = [uuid for uuid in watching.keys() if uuid not in watch_hashes]
-        if missing_hashes:
-            logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after load: {missing_hashes[:5]}")
-        else:
-            logger.debug(f"All {len(watching)} watches have valid hashes")
+    def _load_tags(self):
+        """
+        Load all tags from storage.
+
+        File backend implementation: reads individual tag.json files.
+        Tags loaded from files override any tags in settings (migration path).
+        """
+        from ..model import Tag
+
+        def rehydrate_tag(uuid, entity_dict):
+            """Rehydrate tag as Tag object with forced restock_diff processor."""
+            entity_dict['uuid'] = uuid
+            entity_dict['processor'] = 'restock_diff'  # Force processor for override functionality
+
+            return Tag.model(
+                datastore_path=self.datastore_path,
+                __datastore=self.__data,
+                default=entity_dict
+            )
+
+        tags = load_all_tags(
+            self.datastore_path,
+            rehydrate_tag
+        )
+
+        # Override settings tags with loaded tags
+        # This ensures tag.json files take precedence over settings
+        if tags:
+            self.__data['settings']['application']['tags'].update(tags)
+            logger.info(f"Loaded {len(tags)} tags from individual tag.json files")

    def _delete_watch(self, uuid):
        """
@@ -414,7 +446,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
    def set_last_viewed(self, uuid, timestamp):
        logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
        self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
-        self.mark_watch_dirty(uuid)
+        self.data['watching'][uuid].commit()

        watch_check_update = signal('watch_check_update')
        if watch_check_update:
@@ -422,7 +454,22 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

    def remove_password(self):
        self.__data['settings']['application']['password'] = False
-        self.mark_settings_dirty()
+        self.commit()
+
+    def commit(self):
+        """
+        Save settings immediately to disk using atomic write.
+
+        Uses atomic write pattern (temp file + rename) for crash safety.
+
+        Fire-and-forget: Logs errors but does not raise exceptions.
+        Settings data remains in memory even if save fails, so next commit will retry.
+        """
+        try:
+            self._save_settings()
+            logger.debug("Committed settings")
+        except Exception as e:
+            logger.error(f"Failed to commit settings: {e}")

    def update_watch(self, uuid, update_obj):

@@ -441,7 +488,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

            self.__data['watching'][uuid].update(update_obj)

-        self.mark_watch_dirty(uuid)
+        # Immediate save
+        self.__data['watching'][uuid].commit()

    @property
    def threshold_seconds(self):
@@ -502,10 +550,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
                    except Exception as e:
                        logger.error(f"Failed to delete watch {watch_uuid} from storage: {e}")

-                    # Clean up tracking data
-                    self._watch_hashes.pop(watch_uuid, None)
-                    self._dirty_watches.discard(watch_uuid)
-
                    # Send delete signal
                    watch_delete_signal = signal('watch_deleted')
                    if watch_delete_signal:
@@ -527,17 +571,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
                # Remove from watching dict
                del self.data['watching'][uuid]

-                # Clean up tracking data
-                self._watch_hashes.pop(uuid, None)
-                self._dirty_watches.discard(uuid)
-
                # Send delete signal
                watch_delete_signal = signal('watch_deleted')
                if watch_delete_signal:
                    watch_delete_signal.send(watch_uuid=uuid)

-        self.needs_write_urgent = True
-
    # Clone a watch by UUID
    def clone(self, uuid):
        url = self.data['watching'][uuid].get('url')
@@ -562,7 +600,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
    # Remove a watchs data but keep the entry (URL etc)
    def clear_watch_history(self, uuid):
        self.__data['watching'][uuid].clear_watch()
-        self.needs_write_urgent = True
+        self.__data['watching'][uuid].commit()

    def add_watch(self, url, tag='', extras=None, tag_uuids=None, save_immediately=True):

@@ -675,16 +713,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        self.__data['watching'][new_uuid] = new_watch

        if save_immediately:
-            # Save immediately using polymorphic method
-            try:
-                self.save_watch(new_uuid, force=True)
-                logger.debug(f"Saved new watch {new_uuid}")
-            except Exception as e:
-                logger.error(f"Failed to save new watch {new_uuid}: {e}")
-                # Mark dirty for retry
-                self.mark_watch_dirty(new_uuid)
-        else:
-            self.mark_watch_dirty(new_uuid)
+            # Save immediately using commit
+            new_watch.commit()
+            logger.debug(f"Saved new watch {new_uuid}")

        logger.debug(f"Added '{url}'")

@@ -719,25 +750,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

    # Old sync_to_json and save_datastore methods removed - now handled by FileSavingDataStore parent class

-    # Go through the datastore path and remove any snapshots that are not mentioned in the index
-    # This usually is not used, but can be handy.
-    def remove_unused_snapshots(self):
-        logger.info("Removing snapshots from datastore that are not in the index..")
-
-        index = []
-        for uuid in self.data['watching']:
-            for id in self.data['watching'][uuid].history:
-                index.append(self.data['watching'][uuid].history[str(id)])
-
-        import pathlib
-
-        # Only in the sub-directories
-        for uuid in self.data['watching']:
-            for item in pathlib.Path(self.datastore_path).rglob(uuid + "/*.txt"):
-                if not str(item) in index:
-                    logger.info(f"Removing {item}")
-                    unlink(item)
-
    @property
    def proxy_list(self):
        proxy_list = {}
@@ -829,7 +841,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        if watch:

            # In /datastore/xyz-xyz/headers.txt
-            filepath = os.path.join(watch.watch_data_dir, 'headers.txt')
+            filepath = os.path.join(watch.data_dir, 'headers.txt')
            try:
                if os.path.isfile(filepath):
                    headers.update(parse_headers_from_text_file(filepath))
@@ -889,7 +901,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

            self.__data['settings']['application']['tags'][new_uuid] = new_tag

-        self.mark_settings_dirty()
+        # Save tag to its own tag.json file instead of settings
+        new_tag.commit()
        return new_uuid

    def get_all_tags_for_watch(self, uuid):
@@ -1006,7 +1019,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            notification_urls.append(notification_url)
            self.__data['settings']['application']['notification_urls'] = notification_urls

-        self.mark_settings_dirty()
+        self.commit()
        return notification_url

    # Schema update methods moved to store/updates.py (DatastoreUpdatesMixin)
@@ -81,20 +81,3 @@ class DataStore(ABC):
        """
        pass

-    @abstractmethod
-    def force_save_all(self):
-        """
-        Force immediate synchronous save of all data to storage.
-
-        This is the abstract method for forcing a complete save.
-        Different backends implement this differently:
-        - File backend: Mark all watches/settings dirty, then save
-        - Redis backend: SAVE command or pipeline flush
-        - SQL backend: COMMIT transaction
-
-        Used by:
-        - Backup creation (ensure everything is saved before backup)
-        - Shutdown (ensure all changes are persisted)
-        - Manual save operations
-        """
-        pass
@@ -1,22 +1,17 @@
 """
-File-based datastore with individual watch persistence and dirty tracking.
+File-based datastore with individual watch persistence and immediate commits.

 This module provides the FileSavingDataStore abstract class that implements:
 - Individual watch.json file persistence
- Hash-based change detection (only save what changed)
- Periodic audit scan (catches unmarked changes)
- Background save thread with batched parallel saves
+- Immediate commit-based persistence (watch.commit(), datastore.commit())
 - Atomic file writes safe for NFS/NAS
 """

 import glob
-import hashlib
 import json
 import os
 import tempfile
 import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from threading import Thread
 from loguru import logger

 from .base import DataStore
@@ -34,19 +29,6 @@ except ImportError:
 # Set to True for mission-critical deployments requiring crash consistency
 FORCE_FSYNC_DATA_IS_CRITICAL = bool(strtobool(os.getenv('FORCE_FSYNC_DATA_IS_CRITICAL', 'False')))

-# Save interval configuration: How often the background thread saves dirty items
-# Default 10 seconds - increase for less frequent saves, decrease for more frequent
-DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS = int(os.getenv('DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS', '10'))
-
-# Rolling audit configuration: Scans a fraction of watches each cycle
-# Default: Run audit every 10s, split into 5 shards
-# Full audit completes every 50s (10s × 5 shards)
-# With 56k watches: 56k / 5 = ~11k watches per cycle (~60ms vs 316ms for all)
-# Handles dynamic watch count - recalculates shard boundaries each cycle
-DATASTORE_AUDIT_INTERVAL_SECONDS = int(os.getenv('DATASTORE_AUDIT_INTERVAL_SECONDS', '10'))
-DATASTORE_AUDIT_SHARDS = int(os.getenv('DATASTORE_AUDIT_SHARDS', '5'))
-
-
 # ============================================================================
 # Helper Functions for Atomic File Operations
 # ============================================================================
@@ -61,6 +43,9 @@ def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
    - Size validation
    - Proper error handling

+    Thread safety: Caller must hold datastore.lock to prevent concurrent modifications.
+    Multi-process safety: Not supported - run only one app instance per datastore.
+
    Args:
        file_path: Full path to target JSON file
        data_dict: Dictionary to serialize
@@ -190,23 +175,37 @@ def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
        raise e


+def save_entity_atomic(entity_dir, uuid, entity_dict, filename, entity_type, max_size_mb):
+    """
+    Save an entity (watch/tag) to disk using atomic write pattern.
+
+    Generic function for saving any watch_base subclass (Watch, Tag, etc.).
+
+    Args:
+        entity_dir: Directory for this entity (e.g., /datastore/{uuid})
+        uuid: Entity UUID (for logging)
+        entity_dict: Dictionary representation of the entity
+        filename: JSON filename (e.g., 'watch.json', 'tag.json')
+        entity_type: Type label for logging (e.g., 'watch', 'tag')
+        max_size_mb: Maximum allowed file size in MB
+
+    Raises:
+        ValueError: If serialized data exceeds max_size_mb
+        OSError: If disk is full (ENOSPC) or other I/O error
+    """
+    entity_json = os.path.join(entity_dir, filename)
+    save_json_atomic(entity_json, entity_dict, label=f"{entity_type} {uuid}", max_size_mb=max_size_mb)
+
+
 def save_watch_atomic(watch_dir, uuid, watch_dict):
    """
    Save a watch to disk using atomic write pattern.

-    Convenience wrapper around save_json_atomic for watches.
-
-    Args:
-        watch_dir: Directory for this watch (e.g., /datastore/{uuid})
-        uuid: Watch UUID (for logging)
-        watch_dict: Dictionary representation of the watch
-
-    Raises:
-        ValueError: If serialized data exceeds 10MB (indicates bug or corruption)
-        OSError: If disk is full (ENOSPC) or other I/O error
+    Convenience wrapper around save_entity_atomic for watches.
+    Kept for backwards compatibility.
    """
-    watch_json = os.path.join(watch_dir, "watch.json")
-    save_json_atomic(watch_json, watch_dict, label=f"watch {uuid}", max_size_mb=10)
+    save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
+


 def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
@@ -219,8 +218,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
        rehydrate_entity_func: Function to convert dict to Watch object

    Returns:
-        Tuple of (Watch object, raw_data_dict) or (None, None) if failed
-        The raw_data_dict is needed to compute the hash before rehydration
+        Watch object or None if failed
    """
    try:
        # Check file size before reading
@@ -233,7 +231,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
                f"File: {watch_json}. This indicates a bug or data corruption. "
                f"Watch will be skipped."
            )
-            return None, None
+            return None

        if HAS_ORJSON:
            with open(watch_json, 'rb') as f:
@@ -242,15 +240,9 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
            with open(watch_json, 'r', encoding='utf-8') as f:
                watch_data = json.load(f)

-        if watch_data.get('time_schedule_limit'):
-            del watch_data['time_schedule_limit']
-        if watch_data.get('time_between_check'):
-            del watch_data['time_between_check']
-
-        # Return both the raw data and the rehydrated watch
-        # Raw data is needed to compute hash before rehydration changes anything
+        # Rehydrate and return watch object
        watch_obj = rehydrate_entity_func(uuid, watch_data)
-        return watch_obj, watch_data
+        return watch_obj

    except json.JSONDecodeError as e:
        logger.critical(
@@ -258,7 +250,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
            f"File: {watch_json}. Error: {e}. "
            f"Watch will be skipped and may need manual recovery from backup."
        )
-        return None, None
+        return None
    except ValueError as e:
        # orjson raises ValueError for invalid JSON
        if "invalid json" in str(e).lower() or HAS_ORJSON:
@@ -267,18 +259,18 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
                f"File: {watch_json}. Error: {e}. "
                f"Watch will be skipped and may need manual recovery from backup."
            )
-            return None, None
+            return None
        # Re-raise if it's not a JSON parsing error
        raise
    except FileNotFoundError:
        logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
-        return None, None
+        return None
    except Exception as e:
        logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
-        return None, None
+        return None


-def load_all_watches(datastore_path, rehydrate_entity_func, compute_hash_func):
+def load_all_watches(datastore_path, rehydrate_entity_func):
    """
    Load all watches from individual watch.json files.

@@ -289,21 +281,17 @@ def load_all_watches(datastore_path, rehydrate_entity_func, compute_hash_func):
    Args:
        datastore_path: Path to the datastore directory
        rehydrate_entity_func: Function to convert dict to Watch object
-        compute_hash_func: Function to compute hash from raw watch dict

    Returns:
-        Tuple of (watching_dict, hashes_dict)
-        - watching_dict: uuid -> Watch object
-        - hashes_dict: uuid -> hash string (computed from raw data)
+        Dictionary of uuid -> Watch object
    """
    start_time = time.time()
    logger.info("Loading watches from individual watch.json files...")

    watching = {}
-    watch_hashes = {}

    if not os.path.exists(datastore_path):
-        return watching, watch_hashes
+        return watching

    # Find all watch.json files using glob (faster than manual directory traversal)
    glob_start = time.time()
@@ -319,12 +307,9 @@ def load_all_watches(datastore_path, rehydrate_entity_func, compute_hash_func):
    for watch_json in watch_files:
        # Extract UUID from path: /datastore/{uuid}/watch.json
        uuid_dir = os.path.basename(os.path.dirname(watch_json))
-        watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
-        if watch and raw_data:
+        watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
+        if watch:
            watching[uuid_dir] = watch
-            # Compute hash from rehydrated Watch object (as dict) to match how we compute on save
-            # This ensures hash matches what audit will compute from dict(watch)
-            watch_hashes[uuid_dir] = compute_hash_func(dict(watch))
            loaded += 1

            if loaded % 100 == 0:
@@ -344,7 +329,123 @@ def load_all_watches(datastore_path, rehydrate_entity_func, compute_hash_func):
    else:
        logger.info(f"Loaded {loaded} watches from disk in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)")

-    return watching, watch_hashes
+    return watching
+
+
+def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
+    """
+    Load a tag from its JSON file.
+
+    Args:
+        tag_json: Path to the tag.json file
+        uuid: Tag UUID
+        rehydrate_entity_func: Function to convert dict to Tag object
+
+    Returns:
+        Tag object or None if failed
+    """
+    try:
+        # Check file size before reading
+        file_size = os.path.getsize(tag_json)
+        MAX_TAG_SIZE = 1 * 1024 * 1024  # 1MB
+        if file_size > MAX_TAG_SIZE:
+            logger.critical(
+                f"CORRUPTED TAG DATA: Tag {uuid} file is unexpectedly large: "
+                f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_TAG_SIZE / 1024 / 1024}MB). "
+                f"File: {tag_json}. This indicates a bug or data corruption. "
+                f"Tag will be skipped."
+            )
+            return None
+
+        if HAS_ORJSON:
+            with open(tag_json, 'rb') as f:
+                tag_data = orjson.loads(f.read())
+        else:
+            with open(tag_json, 'r', encoding='utf-8') as f:
+                tag_data = json.load(f)
+
+        tag_data['processor'] = 'restock_diff'
+        # Rehydrate tag (convert dict to Tag object)
+        # processor_override is set inside the rehydration function
+        tag_obj = rehydrate_entity_func(uuid, tag_data)
+        return tag_obj
+
+    except json.JSONDecodeError as e:
+        logger.critical(
+            f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
+            f"File: {tag_json}. Error: {e}. "
+            f"Tag will be skipped and may need manual recovery from backup."
+        )
+        return None
+    except ValueError as e:
+        # orjson raises ValueError for invalid JSON
+        if "invalid json" in str(e).lower() or HAS_ORJSON:
+            logger.critical(
+                f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
+                f"File: {tag_json}. Error: {e}. "
+                f"Tag will be skipped and may need manual recovery from backup."
+            )
+            return None
+        # Re-raise if it's not a JSON parsing error
+        raise
+    except FileNotFoundError:
+        logger.debug(f"Tag file not found: {tag_json} for tag {uuid}")
+        return None
+    except Exception as e:
+        logger.error(f"Failed to load tag {uuid} from {tag_json}: {e}")
+        return None
+
+
+def load_all_tags(datastore_path, rehydrate_entity_func):
+    """
+    Load all tags from individual tag.json files.
+
+    Tags are stored separately from settings in {uuid}/tag.json files.
+
+    Args:
+        datastore_path: Path to the datastore directory
+        rehydrate_entity_func: Function to convert dict to Tag object
+
+    Returns:
+        Dictionary of uuid -> Tag object
+    """
+    logger.info("Loading tags from individual tag.json files...")
+
+    tags = {}
+
+    if not os.path.exists(datastore_path):
+        return tags
+
+    # Find all tag.json files using glob
+    tag_files = glob.glob(os.path.join(datastore_path, "*", "tag.json"))
+
+    total = len(tag_files)
+    if total == 0:
+        logger.debug("No tag.json files found")
+        return tags
+
+    logger.debug(f"Found {total} tag.json files")
+
+    loaded = 0
+    failed = 0
+
+    for tag_json in tag_files:
+        # Extract UUID from path: /datastore/{uuid}/tag.json
+        uuid_dir = os.path.basename(os.path.dirname(tag_json))
+        tag = load_tag_from_file(tag_json, uuid_dir, rehydrate_entity_func)
+        if tag:
+            tags[uuid_dir] = tag
+            loaded += 1
+        else:
+            # load_tag_from_file already logged the specific error
+            failed += 1
+
+    if failed > 0:
+        logger.warning(f"Loaded {loaded} tags, {failed} tags FAILED to load")
+    else:
+        logger.info(f"Loaded {loaded} tags from disk")
+
+    return tags


 # ============================================================================
@@ -353,151 +454,20 @@ def load_all_watches(datastore_path, rehydrate_entity_func, compute_hash_func):

 class FileSavingDataStore(DataStore):
    """
-    Abstract datastore that provides file persistence with change tracking.
+    Abstract datastore that provides file persistence with immediate commits.

    Features:
    - Individual watch.json files (one per watch)
-    - Dirty tracking: Only saves items that have changed
-    - Hash-based change detection: Prevents unnecessary writes
-    - Background save thread: Non-blocking persistence
-    - Two-tier urgency: Standard (60s) and urgent (immediate) saves
+    - Immediate persistence via watch.commit() and datastore.commit()
+    - Atomic file writes for crash safety

    Subclasses must implement:
    - rehydrate_entity(): Convert dict to Watch object
    - Access to internal __data structure for watch management
    """

-    needs_write = False
-    needs_write_urgent = False
-    stop_thread = False
-
-    # Change tracking
-    _dirty_watches = set()      # Watch UUIDs that need saving
-    _dirty_settings = False     # Settings changed
-    _watch_hashes = {}          # UUID -> SHA256 hash for change detection
-
-    # Health monitoring
-    _last_save_time = 0         # Timestamp of last successful save
-    _last_audit_time = 0        # Timestamp of last audit scan
-    _save_cycle_count = 0       # Number of save cycles completed
-    _total_saves = 0            # Total watches saved (lifetime)
-    _save_errors = 0            # Total save errors (lifetime)
-    _audit_count = 0            # Number of audit scans completed
-    _audit_found_changes = 0    # Total unmarked changes found by audits
-    _audit_shard_index = 0      # Current shard being audited (rolling audit)
-
    def __init__(self):
        super().__init__()
-        self.save_data_thread = None
-        self._last_save_time = time.time()
-        self._last_audit_time = time.time()
-
-
-    def mark_watch_dirty(self, uuid):
-        """
-        Mark a watch as needing save.
-
-        Args:
-            uuid: Watch UUID
-        """
-        with self.lock:
-            self._dirty_watches.add(uuid)
-            dirty_count = len(self._dirty_watches)
-
-        # Backpressure detection - warn if dirty set grows too large
-        if dirty_count > 1000:
-            logger.critical(
-                f"BACKPRESSURE WARNING: {dirty_count} watches pending save! "
-                f"Save thread may not be keeping up with write rate. "
-                f"This could indicate disk I/O bottleneck or save thread failure."
-            )
-        elif dirty_count > 500:
-            logger.warning(
-                f"Dirty watch count high: {dirty_count} watches pending save. "
-                f"Monitoring for potential backpressure."
-            )
-
-        self.needs_write = True
-
-    def mark_settings_dirty(self):
-        """Mark settings as needing save."""
-        with self.lock:
-            self._dirty_settings = True
-        self.needs_write = True
-
-    def _compute_hash(self, watch_dict):
-        """
-        Compute SHA256 hash of watch for change detection.
-
-        Args:
-            watch_dict: Dictionary representation of watch
-
-        Returns:
-            Hex string of SHA256 hash
-        """
-        # Use orjson for deterministic serialization if available
-        if HAS_ORJSON:
-            json_bytes = orjson.dumps(watch_dict, option=orjson.OPT_SORT_KEYS)
-        else:
-            json_str = json.dumps(watch_dict, sort_keys=True, ensure_ascii=False)
-            json_bytes = json_str.encode('utf-8')
-
-        return hashlib.sha256(json_bytes).hexdigest()
-
-    def save_watch(self, uuid, force=False, watch_dict=None, current_hash=None):
-        """
-        Save a single watch if it has changed (polymorphic method).
-
-        Args:
-            uuid: Watch UUID
-            force: If True, skip hash check and save anyway
-            watch_dict: Pre-computed watch dictionary (optimization)
-            current_hash: Pre-computed hash (optimization)
-
-        Returns:
-            True if saved, False if skipped (unchanged)
-        """
-        if not self._watch_exists(uuid):
-            logger.warning(f"Cannot save watch {uuid} - does not exist")
-            return False
-
-        # Get watch dict if not provided
-        if watch_dict is None:
-            watch_dict = self._get_watch_dict(uuid)
-
-        # Compute hash if not provided
-        if current_hash is None:
-            current_hash = self._compute_hash(watch_dict)
-
-        # Skip save if unchanged (unless forced)
-        if not force and current_hash == self._watch_hashes.get(uuid):
-            return False
-
-        try:
-            self._save_watch(uuid, watch_dict)
-            self._watch_hashes[uuid] = current_hash
-            logger.debug(f"Saved watch {uuid}")
-            return True
-        except Exception as e:
-            logger.error(f"Failed to save watch {uuid}: {e}")
-            raise
-
-    def _save_watch(self, uuid, watch_dict):
-        """
-        Save a single watch to storage (polymorphic).
-
-        Backend-specific implementation. Subclasses override for different storage:
-        - File backend: Writes to {uuid}/watch.json
-        - Redis backend: SET watch:{uuid}
-        - SQL backend: UPDATE watches WHERE uuid=?
-
-        Args:
-            uuid: Watch UUID
-            watch_dict: Dictionary representation of watch
-        """
-        # Default file implementation
-        watch_dir = os.path.join(self.datastore_path, uuid)
-        save_watch_atomic(watch_dir, uuid, watch_dict)

    def _save_settings(self):
        """
@@ -510,6 +480,7 @@ class FileSavingDataStore(DataStore):
        """
        raise NotImplementedError("Subclass must implement _save_settings")

+
    def _load_watches(self):
        """
        Load all watches from storage (polymorphic).
@@ -535,364 +506,4 @@ class FileSavingDataStore(DataStore):
        """
        raise NotImplementedError("Subclass must implement _delete_watch")

-    def _save_dirty_items(self):
-        """
-        Save dirty watches and settings.

-        This is the core optimization: instead of saving the entire datastore,
-        we only save watches that were marked dirty and settings if changed.
-        """
-        start_time = time.time()
-
-        # Capture dirty sets under lock
-        with self.lock:
-            dirty_watches = list(self._dirty_watches)
-            dirty_settings = self._dirty_settings
-            self._dirty_watches.clear()
-            self._dirty_settings = False
-
-        if not dirty_watches and not dirty_settings:
-            return
-
-        logger.trace(f"Saving {len(dirty_watches)} dirty watches, settings_dirty={dirty_settings}")
-
-        # Save each dirty watch using the polymorphic save method
-        saved_count = 0
-        error_count = 0
-        skipped_unchanged = 0
-
-        # Process in batches of 50, using thread pool for parallel saves
-        BATCH_SIZE = 50
-        MAX_WORKERS = 20  # Number of parallel save threads
-
-        def save_single_watch(uuid):
-            """Helper function for thread pool execution."""
-            try:
-                # Check if watch still exists (might have been deleted)
-                if not self._watch_exists(uuid):
-                    # Watch was deleted, remove hash
-                    self._watch_hashes.pop(uuid, None)
-                    return {'status': 'deleted', 'uuid': uuid}
-
-                # Pre-check hash to avoid unnecessary save_watch() calls
-                watch_dict = self._get_watch_dict(uuid)
-                current_hash = self._compute_hash(watch_dict)
-
-                if current_hash == self._watch_hashes.get(uuid):
-                    # Watch hasn't actually changed, skip
-                    return {'status': 'unchanged', 'uuid': uuid}
-
-                # Pass pre-computed values to avoid redundant serialization/hashing
-                if self.save_watch(uuid, force=True, watch_dict=watch_dict, current_hash=current_hash):
-                    return {'status': 'saved', 'uuid': uuid}
-                else:
-                    return {'status': 'skipped', 'uuid': uuid}
-            except Exception as e:
-                logger.error(f"Error saving watch {uuid}: {e}")
-                return {'status': 'error', 'uuid': uuid, 'error': e}
-
-        # Process dirty watches in batches
-        for batch_start in range(0, len(dirty_watches), BATCH_SIZE):
-            batch = dirty_watches[batch_start:batch_start + BATCH_SIZE]
-            batch_num = (batch_start // BATCH_SIZE) + 1
-            total_batches = (len(dirty_watches) + BATCH_SIZE - 1) // BATCH_SIZE
-
-            if len(dirty_watches) > BATCH_SIZE:
-                logger.trace(f"Save batch {batch_num}/{total_batches} ({len(batch)} watches)")
-
-            # Use thread pool to save watches in parallel
-            with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
-                # Submit all save tasks
-                future_to_uuid = {executor.submit(save_single_watch, uuid): uuid for uuid in batch}
-
-                # Collect results as they complete
-                for future in as_completed(future_to_uuid):
-                    result = future.result()
-                    status = result['status']
-
-                    if status == 'saved':
-                        saved_count += 1
-                    elif status == 'unchanged':
-                        skipped_unchanged += 1
-                    elif status == 'error':
-                        error_count += 1
-                        # Re-mark for retry
-                        with self.lock:
-                            self._dirty_watches.add(result['uuid'])
-                    # 'deleted' and 'skipped' don't need special handling
-
-        # Save settings if changed
-        if dirty_settings:
-            try:
-                self._save_settings()
-                logger.debug("Saved settings")
-            except Exception as e:
-                logger.error(f"Failed to save settings: {e}")
-                error_count += 1
-                with self.lock:
-                    self._dirty_settings = True
-
-        # Update metrics
-        elapsed = time.time() - start_time
-        self._save_cycle_count += 1
-        self._total_saves += saved_count
-        self._save_errors += error_count
-        self._last_save_time = time.time()
-
-        # Log performance metrics
-        if saved_count > 0:
-            avg_time_per_watch = (elapsed / saved_count) * 1000  # milliseconds
-            skipped_msg = f", {skipped_unchanged} unchanged" if skipped_unchanged > 0 else ""
-            parallel_msg = f" [parallel: {MAX_WORKERS} workers]" if saved_count > 1 else ""
-            logger.info(
-                f"Successfully saved {saved_count} watches in {elapsed:.2f}s "
-                f"(avg {avg_time_per_watch:.1f}ms per watch{skipped_msg}){parallel_msg}. "
-                f"Total: {self._total_saves} saves, {self._save_errors} errors (lifetime)"
-            )
-        elif skipped_unchanged > 0:
-            logger.debug(f"Save cycle: {skipped_unchanged} watches verified unchanged (hash match), nothing saved")
-
-        if error_count > 0:
-            logger.error(f"Save cycle completed with {error_count} errors")
-
-        self.needs_write = False
-        self.needs_write_urgent = False
-
-    def _watch_exists(self, uuid):
-        """
-        Check if watch exists. Subclass must implement.
-
-        Args:
-            uuid: Watch UUID
-
-        Returns:
-            bool
-        """
-        raise NotImplementedError("Subclass must implement _watch_exists")
-
-    def _get_watch_dict(self, uuid):
-        """
-        Get watch as dictionary. Subclass must implement.
-
-        Args:
-            uuid: Watch UUID
-
-        Returns:
-            Dictionary representation of watch
-        """
-        raise NotImplementedError("Subclass must implement _get_watch_dict")
-
-    def _audit_all_watches(self):
-        """
-        Rolling audit: Scans a fraction of watches to detect unmarked changes.
-
-        Instead of scanning ALL watches at once, this scans 1/N shards per cycle.
-        The shard rotates each cycle, completing a full audit every N cycles.
-
-        Handles dynamic watch count - recalculates shard boundaries each cycle,
-        so newly added watches will be audited in subsequent cycles.
-
-        Benefits:
-        - Lower CPU per cycle (56k / 5 = ~11k watches vs all 56k)
-        - More frequent audits overall (every 50s vs every 10s)
-        - Spreads load evenly across time
-        """
-        audit_start = time.time()
-
-        # Get list of all watch UUIDs (read-only, no lock needed)
-        try:
-            all_uuids = list(self.data['watching'].keys())
-        except (KeyError, AttributeError, RuntimeError):
-            # Data structure not ready or being modified
-            return
-
-        if not all_uuids:
-            return
-
-        total_watches = len(all_uuids)
-
-        # Calculate this cycle's shard boundaries
-        # Example: 56,278 watches / 5 shards = 11,255 watches per shard
-        # Shard 0: [0:11255], Shard 1: [11255:22510], etc.
-        shard_size = (total_watches + DATASTORE_AUDIT_SHARDS - 1) // DATASTORE_AUDIT_SHARDS
-        start_idx = self._audit_shard_index * shard_size
-        end_idx = min(start_idx + shard_size, total_watches)
-
-        # Handle wrap-around (shouldn't happen normally, but defensive)
-        if start_idx >= total_watches:
-            self._audit_shard_index = 0
-            start_idx = 0
-            end_idx = min(shard_size, total_watches)
-
-        # Audit only this shard's watches
-        shard_uuids = all_uuids[start_idx:end_idx]
-
-        changes_found = 0
-        errors = 0
-
-        for uuid in shard_uuids:
-            try:
-                # Get current watch dict and compute hash
-                watch_dict = self._get_watch_dict(uuid)
-                current_hash = self._compute_hash(watch_dict)
-                stored_hash = self._watch_hashes.get(uuid)
-
-                # If hash changed and not already marked dirty, mark it
-                if current_hash != stored_hash:
-                    with self.lock:
-                        if uuid not in self._dirty_watches:
-                            self._dirty_watches.add(uuid)
-                            changes_found += 1
-                            logger.warning(
-                                f"Audit detected unmarked change in watch {uuid[:8]}... current {current_hash:8} stored hash {stored_hash[:8]}"
-                                f"(hash changed but not marked dirty)"
-                            )
-                            self.needs_write = True
-            except Exception as e:
-                errors += 1
-                logger.trace(f"Audit error for watch {uuid[:8]}...: {e}")
-
-        audit_elapsed = (time.time() - audit_start) * 1000  # milliseconds
-
-        # Advance to next shard (wrap around after last shard)
-        self._audit_shard_index = (self._audit_shard_index + 1) % DATASTORE_AUDIT_SHARDS
-
-        # Update metrics
-        self._audit_count += 1
-        self._audit_found_changes += changes_found
-        self._last_audit_time = time.time()
-
-        if changes_found > 0:
-            logger.warning(
-                f"Audit shard {self._audit_shard_index}/{DATASTORE_AUDIT_SHARDS} found {changes_found} "
-                f"unmarked changes in {len(shard_uuids)}/{total_watches} watches ({audit_elapsed:.1f}ms)"
-            )
-        else:
-            logger.trace(
-                f"Audit shard {self._audit_shard_index}/{DATASTORE_AUDIT_SHARDS}: "
-                f"{len(shard_uuids)}/{total_watches} watches checked, 0 changes ({audit_elapsed:.1f}ms)"
-            )
-
-    def save_datastore(self):
-        """
-        Background thread that periodically saves dirty items and audits watches.
-
-        Runs two independent cycles:
-        1. Save dirty items every DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS (default 10s)
-        2. Rolling audit: every DATASTORE_AUDIT_INTERVAL_SECONDS (default 10s)
-           - Scans 1/DATASTORE_AUDIT_SHARDS watches per cycle (default 1/5)
-           - Full audit completes every 50s (10s × 5 shards)
-           - Automatically handles new/deleted watches
-
-        Uses 0.5s sleep intervals for responsiveness to urgent saves.
-        """
-        while True:
-            if self.stop_thread:
-                # Graceful shutdown: flush any remaining dirty items before stopping
-                if self.needs_write or self._dirty_watches or self._dirty_settings:
-                    logger.warning("Datastore save thread stopping - flushing remaining dirty items...")
-                    try:
-                        self._save_dirty_items()
-                        logger.info("Graceful shutdown complete - all data saved")
-                    except Exception as e:
-                        logger.critical(f"FAILED to save dirty items during shutdown: {e}")
-                else:
-                    logger.info("Datastore save thread stopping - no dirty items")
-                return
-
-            # Check if it's time to run audit scan (every N seconds)
-            if time.time() - self._last_audit_time >= DATASTORE_AUDIT_INTERVAL_SECONDS:
-                try:
-                    self._audit_all_watches()
-                except Exception as e:
-                    logger.error(f"Error in audit cycle: {e}")
-
-            # Save dirty items if needed
-            if self.needs_write or self.needs_write_urgent:
-                try:
-                    self._save_dirty_items()
-                except Exception as e:
-                    logger.error(f"Error in save cycle: {e}")
-
-            # Timer with early break for urgent saves
-            # Each iteration is 0.5 seconds, so iterations = DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS * 2
-            for i in range(DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS * 2):
-                time.sleep(0.5)
-                if self.stop_thread or self.needs_write_urgent:
-                    break
-
-    def start_save_thread(self):
-        """Start the background save thread."""
-        if not self.save_data_thread or not self.save_data_thread.is_alive():
-            self.save_data_thread = Thread(target=self.save_datastore, daemon=True, name="DatastoreSaver")
-            self.save_data_thread.start()
-            logger.info("Datastore save thread started")
-
-    def force_save_all(self):
-        """
-        Force immediate synchronous save of all changes to storage.
-
-        File backend implementation of the abstract force_save_all() method.
-        Marks all watches and settings as dirty, then saves immediately.
-
-        Used by:
-        - Backup creation (ensure everything is saved before backup)
-        - Shutdown (ensure all changes are persisted)
-        - Manual save operations
-        """
-        logger.info("Force saving all data to storage...")
-
-        # Mark everything as dirty to ensure complete save
-        for uuid in self.data['watching'].keys():
-            self.mark_watch_dirty(uuid)
-        self.mark_settings_dirty()
-
-        # Save immediately (synchronous)
-        self._save_dirty_items()
-
-        logger.success("All data saved to storage")
-
-    def get_health_status(self):
-        """
-        Get datastore health status for monitoring.
-
-        Returns:
-            dict with health metrics and status
-        """
-        now = time.time()
-        time_since_last_save = now - self._last_save_time
-
-        with self.lock:
-            dirty_count = len(self._dirty_watches)
-
-        is_thread_alive = self.save_data_thread and self.save_data_thread.is_alive()
-
-        # Determine health status
-        if not is_thread_alive:
-            status = "CRITICAL"
-            message = "Save thread is DEAD"
-        elif time_since_last_save > 300:  # 5 minutes
-            status = "WARNING"
-            message = f"No save activity for {time_since_last_save:.0f}s"
-        elif dirty_count > 1000:
-            status = "WARNING"
-            message = f"High backpressure: {dirty_count} watches pending"
-        elif self._save_errors > 0 and (self._save_errors / max(self._total_saves, 1)) > 0.01:
-            status = "WARNING"
-            message = f"High error rate: {self._save_errors} errors"
-        else:
-            status = "HEALTHY"
-            message = "Operating normally"
-
-        return {
-            "status": status,
-            "message": message,
-            "thread_alive": is_thread_alive,
-            "dirty_watches": dirty_count,
-            "dirty_settings": self._dirty_settings,
-            "last_save_seconds_ago": int(time_since_last_save),
-            "save_cycles": self._save_cycle_count,
-            "total_saves": self._total_saves,
-            "total_errors": self._save_errors,
-            "error_rate_percent": round((self._save_errors / max(self._total_saves, 1)) * 100, 2)
-        }
@@ -1,66 +0,0 @@
-"""
-Legacy format loader for url-watches.json.
-
-Provides functions to detect and load from the legacy monolithic JSON format.
-Used during migration (update_26) to transition to individual watch.json files.
-"""
-
-import os
-import json
-from loguru import logger
-
-# Try to import orjson for faster JSON serialization
-try:
-    import orjson
-    HAS_ORJSON = True
-except ImportError:
-    HAS_ORJSON = False
-
-
-def has_legacy_datastore(datastore_path):
-    """
-    Check if a legacy url-watches.json file exists.
-
-    This is used by update_26 to determine if migration is needed.
-
-    Args:
-        datastore_path: Path to datastore directory
-
-    Returns:
-        bool: True if url-watches.json exists
-    """
-    url_watches_json = os.path.join(datastore_path, "url-watches.json")
-    return os.path.exists(url_watches_json)
-
-
-def load_legacy_format(json_store_path):
-    """
-    Load datastore from legacy url-watches.json format.
-
-    Args:
-        json_store_path: Full path to url-watches.json file
-
-    Returns:
-        dict: Loaded datastore data with 'watching', 'settings', etc.
-        None: If file doesn't exist or loading failed
-    """
-    logger.info(f"Loading from legacy format: {json_store_path}")
-
-    if not os.path.isfile(json_store_path):
-        logger.warning(f"Legacy file not found: {json_store_path}")
-        return None
-
-    try:
-        if HAS_ORJSON:
-            with open(json_store_path, 'rb') as f:
-                data = orjson.loads(f.read())
-        else:
-            with open(json_store_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-
-        logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
-        return data
-
-    except Exception as e:
-        logger.error(f"Failed to load legacy format: {e}")
-        return None
@@ -16,12 +16,18 @@ import time
 from loguru import logger
 from copy import deepcopy

+
+# Try to import orjson for faster JSON serialization
+try:
+    import orjson
+    HAS_ORJSON = True
+except ImportError:
+    HAS_ORJSON = False
+
 from ..html_tools import TRANSLATE_WHITESPACE_TABLE
 from ..processors.restock_diff import Restock
 from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
 from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
-from .file_saving_datastore import save_watch_atomic
-

 def create_backup_tarball(datastore_path, update_number):
    """
@@ -29,6 +35,7 @@ def create_backup_tarball(datastore_path, update_number):

    Includes:
    - All {uuid}/watch.json files
+    - All {uuid}/tag.json files
    - changedetection.json (settings, if it exists)
    - url-watches.json (legacy format, if it exists)
    - Directory structure preserved
@@ -44,7 +51,7 @@ def create_backup_tarball(datastore_path, update_number):
    To restore from a backup:
        cd /path/to/datastore
        tar -xzf before-update-N-timestamp.tar.gz
-    This will restore all watch.json files and settings to their pre-update state.
+    This will restore all watch.json and tag.json files and settings to their pre-update state.
    """
    timestamp = int(time.time())
    backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
@@ -66,9 +73,10 @@ def create_backup_tarball(datastore_path, update_number):
                tar.add(url_watches_json, arcname="url-watches.json")
                logger.debug("Added url-watches.json to backup")

-            # Backup all watch directories with their watch.json files
+            # Backup all watch/tag directories with their JSON files
            # This preserves the UUID directory structure
            watch_count = 0
+            tag_count = 0
            for entry in os.listdir(datastore_path):
                entry_path = os.path.join(datastore_path, entry)

@@ -80,17 +88,22 @@ def create_backup_tarball(datastore_path, update_number):
                if entry.startswith('.') or entry.startswith('before-update-'):
                    continue

-                # Check if this directory has a watch.json (indicates it's a watch UUID directory)
+                # Backup watch.json if exists
                watch_json = os.path.join(entry_path, "watch.json")
                if os.path.isfile(watch_json):
-                    # Add the watch.json file preserving directory structure
                    tar.add(watch_json, arcname=f"{entry}/watch.json")
                    watch_count += 1

                    if watch_count % 100 == 0:
                        logger.debug(f"Backed up {watch_count} watch.json files...")

-            logger.success(f"Backup created: {backup_filename} ({watch_count} watches)")
+                # Backup tag.json if exists
+                tag_json = os.path.join(entry_path, "tag.json")
+                if os.path.isfile(tag_json):
+                    tar.add(tag_json, arcname=f"{entry}/tag.json")
+                    tag_count += 1
+
+            logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
            return backup_path

    except Exception as e:
@@ -130,6 +143,7 @@ class DatastoreUpdatesMixin:
        return updates_available

    def run_updates(self, current_schema_version=None):
+        import sys
        """
        Run all pending schema updates sequentially.

@@ -147,12 +161,29 @@ class DatastoreUpdatesMixin:
        2. For each update > current schema version:
           - Create backup of datastore
           - Run update method
-           - Update schema version
-           - Mark settings and watches dirty
+           - Update schema version and commit settings
+           - Commit all watches and tags
        3. If any update fails, stop processing
-        4. Save all changes immediately
+        4. All changes saved via individual .commit() calls
        """
        updates_available = self.get_updates_available()
+        if self.data.get('watching'):
+            test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
+            from ..model.Watch import model
+
+            if not isinstance(test_watch, model):
+                import sys
+                logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
+                sys.exit(1)
+
+        if self.data['settings']['application'].get('tags',{}):
+            test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
+            from ..model.Tag import model as tag_model
+
+            if not isinstance(test_tag, tag_model):
+                import sys
+                logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
+                sys.exit(1)

        # Determine current schema version
        if current_schema_version is None:
@@ -168,7 +199,7 @@ class DatastoreUpdatesMixin:
                    latest_update = updates_available[-1] if updates_available else 0
                    logger.info(f"No schema version found and no watches exist - assuming fresh install, setting schema_version to {latest_update}")
                    self.data['settings']['application']['schema_version'] = latest_update
-                    self.mark_settings_dirty()
+                    self.commit()
                    return  # No updates needed for fresh install
                else:
                    # Has watches but no schema version - likely old datastore, run all updates
@@ -194,31 +225,15 @@ class DatastoreUpdatesMixin:
                try:
                    update_method = getattr(self, f"update_{update_n}")()
                except Exception as e:
-                    logger.error(f"Error while trying update_{update_n}")
-                    logger.error(e)
-                    # Don't run any more updates
-                    return
+                    logger.critical(f"Error while trying update_{update_n}")
+                    logger.exception(e)
+                    sys.exit(1)
                else:
-                    # Bump the version, important
+                    # Bump the version
                    self.data['settings']['application']['schema_version'] = update_n
-                    self.mark_settings_dirty()
+                    self.commit()

-                    # CRITICAL: Mark all watches as dirty so changes are persisted
-                    # Most updates modify watches, and in the new individual watch.json structure,
-                    # we need to ensure those changes are saved
-                    logger.info(f"Marking all {len(self.data['watching'])} watches as dirty after update_{update_n} (so that it saves them to disk)")
-                    for uuid in self.data['watching'].keys():
-                        self.mark_watch_dirty(uuid)
-
-                    # Save changes immediately after each update (more resilient than batching)
-                    logger.critical(f"Saving all changes after update_{update_n}")
-                    try:
-                        self._save_dirty_items()
-                        logger.success(f"Update {update_n} changes saved successfully")
-                    except Exception as e:
-                        logger.error(f"Failed to save update_{update_n} changes: {e}")
-                        # Don't raise - update already ran, but changes might not be persisted
-                        # The update will try to run again on next startup
+                    logger.success(f"Update {update_n} completed")

                    # Track which updates ran
                    updates_ran.append(update_n)
@@ -468,6 +483,14 @@ class DatastoreUpdatesMixin:
                del self.data['watching'][uuid]['extract_title_as_title']

        if self.data['settings']['application'].get('extract_title_as_title'):
+            # Ensure 'ui' key exists (defensive for edge cases where base_config merge didn't happen)
+            if 'ui' not in self.data['settings']['application']:
+                self.data['settings']['application']['ui'] = {
+                    'use_page_title_in_list': True,
+                    'open_diff_in_new_tab': True,
+                    'socket_io_enabled': True,
+                    'favicons_enabled': True
+                }
            self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')

    def update_21(self):
@@ -555,27 +578,6 @@ class DatastoreUpdatesMixin:
        logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
        logger.critical("=" * 80)

-        # Check if already migrated
-        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
-        if os.path.exists(changedetection_json):
-            logger.info("Migration already completed (changedetection.json exists), skipping")
-            return
-
-        # Check if we need to load legacy data
-        from .legacy_loader import has_legacy_datastore, load_legacy_format
-
-        if not has_legacy_datastore(self.datastore_path):
-            logger.info("No legacy datastore found, nothing to migrate")
-            return
-
-        # Load legacy data from url-watches.json
-        logger.critical("Loading legacy datastore from url-watches.json...")
-        legacy_path = os.path.join(self.datastore_path, "url-watches.json")
-        legacy_data = load_legacy_format(legacy_path)
-
-        if not legacy_data:
-            raise Exception("Failed to load legacy datastore from url-watches.json")
-
        # Populate settings from legacy data
        logger.info("Populating settings from legacy data...")
        watch_count = len(self.data['watching'])
@@ -587,9 +589,7 @@ class DatastoreUpdatesMixin:
        saved_count = 0
        for uuid, watch in self.data['watching'].items():
            try:
-                watch_dict = dict(watch)
-                watch_dir = os.path.join(self.datastore_path, uuid)
-                save_watch_atomic(watch_dir, uuid, watch_dict)
+                watch.commit()
                saved_count += 1

                if saved_count % 100 == 0:
@@ -635,36 +635,20 @@ class DatastoreUpdatesMixin:

        # Phase 4: Verify settings file exists
        logger.critical("Phase 4/4: Verifying changedetection.json exists...")
+        changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
+        if not os.path.isfile(changedetection_json_new_schema):
+            import sys
+            logger.critical("Migration failed, changedetection.json not found after update ran!")
+            sys.exit(1)

-        if not os.path.isfile(changedetection_json):
-            raise Exception(
-                "Migration failed: changedetection.json not found after save. "
-                "url-watches.json remains intact, safe to retry."
-            )

        logger.critical("Phase 4 complete: Verified changedetection.json exists")

        # Success! Now reload from new format
        logger.critical("Reloading datastore from new format...")
-        self._load_state() # Includes load_watches
+        # write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
+        self._save_settings()
        logger.success("Datastore reloaded from new format successfully")
-
-
-        # Verify all watches have hashes after migration
-        missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
-        if missing_hashes:
-            logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
-        else:
-            logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
-
-        # Set schema version to latest available update
-        # This prevents re-running updates and re-marking all watches as dirty
-        updates_available = self.get_updates_available()
-        latest_schema = updates_available[-1] if updates_available else 26
-        self.data['settings']['application']['schema_version'] = latest_schema
-        self.mark_settings_dirty()
-        logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
-
        logger.critical("=" * 80)
        logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
        logger.critical("=" * 80)
@@ -683,4 +667,59 @@ class DatastoreUpdatesMixin:
        logger.info("")

    def update_26(self):
-        self.migrate_legacy_db_format()
+        self.migrate_legacy_db_format()
+
+    def update_28(self):
+        """
+        Migrate tags to individual tag.json files.
+
+        Tags are currently saved only in changedetection.json (settings).
+        This migration ALSO saves them to individual {uuid}/tag.json files,
+        similar to how watches are stored (dual storage).
+
+        Benefits:
+        - Allows atomic tag updates without rewriting entire settings
+        - Enables independent tag versioning/backup
+        - Maintains backwards compatibility (tags stay in settings too)
+        """
+        # Force save as tag.json (not watch.json) even if object is corrupted
+
+        logger.critical("=" * 80)
+        logger.critical("Running migration: Individual tag persistence (update_28)")
+        logger.critical("Creating individual tag.json files")
+        logger.critical("=" * 80)
+
+        tags = self.data['settings']['application'].get('tags', {})
+        tag_count = len(tags)
+
+        if tag_count == 0:
+            logger.info("No tags found, skipping migration")
+            return
+
+        logger.info(f"Migrating {tag_count} tags to individual tag.json files...")
+
+        saved_count = 0
+        failed_count = 0
+
+        for uuid, tag_data in tags.items():
+            try:
+                tag_data.commit()
+                saved_count += 1
+                if saved_count % 10 == 0:
+                    logger.info(f"  Progress: {saved_count}/{tag_count} tags migrated...")
+
+            except Exception as e:
+                logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
+                failed_count += 1
+
+        if failed_count > 0:
+            logger.warning(f"Migration complete: {saved_count} tags saved, {failed_count} tags FAILED")
+        else:
+            logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")
+
+        # Tags remain in settings for backwards compatibility AND easy access
+        # On next load, _load_tags() will read from tag.json files and merge with settings
+        logger.info("Tags saved to both settings AND individual tag.json files")
+        logger.info("Future tag edits will update both locations (dual storage)")
+        logger.critical("=" * 80)
+
@@ -308,10 +308,6 @@ def prepare_test_function(live_server, datastore_path):



-    # Prevent background thread from writing during cleanup/reload
-    datastore.needs_write = False
-    datastore.needs_write_urgent = False
-
    # CRITICAL: Clean up any files from previous tests
    # This ensures a completely clean directory
    cleanup(datastore_path)
@@ -344,7 +340,6 @@ def prepare_test_function(live_server, datastore_path):
                break

        datastore.data['watching'] = {}
-        datastore.needs_write = True
    except Exception as e:
        logger.warning(f"Error during datastore cleanup: {e}")

@@ -465,7 +465,10 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto

    assert res.status_code == 400, "Should get error 400 when we give a field that doesnt exist"
    # Message will come from `flask_expects_json`
-    assert b'Additional properties are not allowed' in res.data
+    # With patternProperties for processor_config_*, the error message format changed slightly
+    assert (b'Additional properties are not allowed' in res.data or
+            b'does not match any of the regexes' in res.data), \
+            "Should reject unknown fields with schema validation error"


    # Try a XSS URL
@@ -486,6 +489,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):

    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

+    # Test 1: Basic import with tag
    res = client.post(
        url_for("import") + "?tag=import-test",
        data='https://website1.com\r\nhttps://website2.com',
@@ -504,6 +508,209 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
    res = client.get(url_for('tags.tags_overview_page'))
    assert b'import-test' in res.data

+    # Test 2: Import with watch configuration fields (issue #3845)
+    # Test string field (include_filters), boolean (paused), and processor
+    import urllib.parse
+    params = urllib.parse.urlencode({
+        'tag': 'config-test',
+        'include_filters': 'div.content',
+        'paused': 'true',
+        'processor': 'text_json_diff',
+        'title': 'Imported with Config'
+    })
+
+    res = client.post(
+        url_for("import") + "?" + params,
+        data='https://website3.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 200
+    assert len(res.json) == 1
+    uuid = res.json[0]
+
+    # Verify the configuration was applied
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    assert watch['include_filters'] == ['div.content'], "include_filters should be set as array"
+    assert watch['paused'] == True, "paused should be True"
+    assert watch['processor'] == 'text_json_diff', "processor should be set"
+    assert watch['title'] == 'Imported with Config', "title should be set"
+
+    # Test 3: Import with array field (notification_urls) - using valid Apprise format
+    params = urllib.parse.urlencode({
+        'tag': 'notification-test',
+        'notification_urls': 'mailto://test@example.com,mailto://admin@example.com'
+    })
+
+    res = client.post(
+        url_for("import") + "?" + params,
+        data='https://website4.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 200
+    uuid = res.json[0]
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
+    assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
+
+    # Test 4: Import with object field (time_between_check)
+    import json
+    time_config = json.dumps({"hours": 2, "minutes": 30})
+    params = urllib.parse.urlencode({
+        'tag': 'schedule-test',
+        'time_between_check': time_config
+    })
+
+    res = client.post(
+        url_for("import") + "?" + params,
+        data='https://website5.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 200
+    uuid = res.json[0]
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    assert watch['time_between_check']['hours'] == 2, "time_between_check hours should be 2"
+    assert watch['time_between_check']['minutes'] == 30, "time_between_check minutes should be 30"
+
+    # Test 5: Import with invalid processor (should fail)
+    res = client.post(
+        url_for("import") + "?processor=invalid_processor",
+        data='https://website6.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 400, "Should reject invalid processor"
+    assert b"Invalid processor" in res.data, "Error message should mention invalid processor"
+
+    # Test 6: Import with invalid field (should fail)
+    res = client.post(
+        url_for("import") + "?unknown_field=value",
+        data='https://website7.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 400, "Should reject unknown field"
+    assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
+
+
+def test_api_import_small_synchronous(client, live_server, measure_memory_usage, datastore_path):
+    """Test that small imports (< threshold) are processed synchronously"""
+    from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
+
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    # Use local test endpoint to avoid network delays
+    test_url_base = url_for('test_endpoint', _external=True)
+
+    # Create URLs: threshold - 1 to stay under limit
+    num_urls = min(5, IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD - 1)  # Use small number for faster test
+    urls = '\n'.join([f'{test_url_base}?id=small-{i}' for i in range(num_urls)])
+
+    # Import small batch
+    res = client.post(
+        url_for("import") + "?tag=small-test",
+        data=urls,
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    # Should return 200 OK with UUID list (synchronous)
+    assert res.status_code == 200, f"Should return 200 for small imports, got {res.status_code}"
+    assert isinstance(res.json, list), "Response should be a list of UUIDs"
+    assert len(res.json) == num_urls, f"Should return {num_urls} UUIDs, got {len(res.json)}"
+
+    # Verify all watches were created immediately
+    for uuid in res.json:
+        assert uuid in live_server.app.config['DATASTORE'].data['watching'], \
+            f"Watch {uuid} should exist immediately after synchronous import"
+
+    print(f"\n✓ Successfully created {num_urls} watches synchronously")
+
+
+def test_api_import_large_background(client, live_server, measure_memory_usage, datastore_path):
+    """Test that large imports (>= threshold) are processed in background thread"""
+    from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
+    import time
+
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    # Use local test endpoint to avoid network delays
+    test_url_base = url_for('test_endpoint', _external=True)
+
+    # Create URLs: threshold + 10 to trigger background processing
+    num_urls = IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD + 10
+    urls = '\n'.join([f'{test_url_base}?id=bulk-{i}' for i in range(num_urls)])
+
+    # Import large batch
+    res = client.post(
+        url_for("import") + "?tag=bulk-test",
+        data=urls,
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    # Should return 202 Accepted (background processing)
+    assert res.status_code == 202, f"Should return 202 for large imports, got {res.status_code}"
+    assert b"background" in res.data.lower(), "Response should mention background processing"
+
+    # Extract expected count from response
+    response_json = res.json
+    assert 'count' in response_json, "Response should include count"
+    assert response_json['count'] == num_urls, f"Count should be {num_urls}, got {response_json['count']}"
+
+    # Wait for background thread to complete (with timeout)
+    max_wait = 10  # seconds
+    wait_interval = 0.5
+    elapsed = 0
+    watches_created = 0
+
+    while elapsed < max_wait:
+        time.sleep(wait_interval)
+        elapsed += wait_interval
+
+        # Count how many watches have been created
+        watches_created = len([
+            uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
+            if 'id=bulk-' in watch['url']
+        ])
+
+        if watches_created == num_urls:
+            break
+
+    # Verify all watches were created
+    assert watches_created == num_urls, \
+        f"Expected {num_urls} watches to be created, but found {watches_created} after {elapsed}s"
+
+    # Verify watches have correct configuration
+    bulk_watches = [
+        watch for watch in live_server.app.config['DATASTORE'].data['watching'].values()
+        if 'id=bulk-' in watch['url']
+    ]
+
+    assert len(bulk_watches) == num_urls, "All bulk watches should exist"
+
+    # Check that they have the correct tag
+    datastore = live_server.app.config['DATASTORE']
+    # Get UUIDs of bulk watches by filtering the datastore keys
+    bulk_watch_uuids = [
+        uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
+        if 'id=bulk-' in watch['url']
+    ]
+    for watch_uuid in bulk_watch_uuids:
+        tags = datastore.get_all_tags_for_watch(uuid=watch_uuid)
+        tag_names = [t['title'] for t in tags.values()]
+        assert 'bulk-test' in tag_names, f"Watch {watch_uuid} should have 'bulk-test' tag"
+
+    print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
+
+
 def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):


@@ -80,7 +80,10 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
    # Should get 400 error due to invalid field (this will be caught by internal validation)
    # Note: This tests the flow where OpenAPI validation passes but internal validation catches it
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties"
+    # With patternProperties for processor_config_*, the error message format changed slightly
+    assert (b"Additional properties are not allowed" in res.data or
+            b"does not match any of the regexes" in res.data), \
+            "Should contain validation error about additional/invalid properties"


 def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
@@ -18,7 +18,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
        url_for("tags"),
        headers={'x-api-key': api_key}
    )
-    assert res.text.strip() == "{}", "Should be empty list"
+    assert res.get_data(as_text=True).strip() == "{}", "Should be empty list"
    assert res.status_code == 200

    res = client.post(
@@ -36,7 +36,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
        headers={'x-api-key': api_key}
    )
    assert res.status_code == 200
-    assert new_tag_uuid in res.text
+    assert new_tag_uuid in res.get_data(as_text=True)
    assert res.json[new_tag_uuid]['title'] == tag_title
    assert res.json[new_tag_uuid]['notification_muted'] == False

@@ -118,6 +118,16 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
    assert res.status_code == 200
    assert new_tag_uuid in res.json.get('tags', [])

+    # Test that tags are returned when listing ALL watches (issue #3854)
+    res = client.get(
+        url_for("createwatch"),  # GET /api/v1/watch - list all watches
+        headers={'x-api-key': api_key}
+    )
+    assert res.status_code == 200
+    assert watch_uuid in res.json, "Watch should be in the list"
+    assert 'tags' in res.json[watch_uuid], "Tags field should be present in watch list"
+    assert new_tag_uuid in res.json[watch_uuid]['tags'], "Tag UUID should be in tags array"
+
    # Check recheck by tag
    before_check_time = live_server.app.config['DATASTORE'].data['watching'][watch_uuid].get('last_checked')
    time.sleep(1)
@@ -148,7 +158,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
        headers={'x-api-key': api_key}
    )
    assert res.status_code == 200
-    assert new_tag_uuid not in res.text
+    assert new_tag_uuid not in res.get_data(as_text=True)

    # Verify tag was removed from watch
    res = client.get(
@@ -0,0 +1,661 @@
+#!/usr/bin/env python3
+"""
+Tests for immediate commit-based persistence system.
+
+Tests cover:
+- Watch.commit() persistence to disk
+- Concurrent commit safety (race conditions)
+- Processor config separation
+- Data loss prevention (settings, tags, watch modifications)
+"""
+
+import json
+import os
+import threading
+import time
+from flask import url_for
+from .util import wait_for_all_checks
+
+
+# ==============================================================================
+# 2. Commit() Persistence Tests
+# ==============================================================================
+
+def test_watch_commit_persists_to_disk(client, live_server):
+    """Test that watch.commit() actually writes to watch.json immediately"""
+    datastore = client.application.config.get('DATASTORE')
+
+    # Create a watch
+    uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Original Title'})
+    watch = datastore.data['watching'][uuid]
+
+    # Modify and commit
+    watch['title'] = 'Modified Title'
+    watch['paused'] = True
+    watch.commit()
+
+    # Read directly from disk (bypass datastore cache)
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    assert os.path.exists(watch_json_path), "watch.json should exist on disk"
+
+    with open(watch_json_path, 'r') as f:
+        disk_data = json.load(f)
+
+    assert disk_data['title'] == 'Modified Title', "Title should be persisted to disk"
+    assert disk_data['paused'] == True, "Paused state should be persisted to disk"
+    assert disk_data['uuid'] == uuid, "UUID should match"
+
+
+def test_watch_commit_survives_reload(client, live_server):
+    """Test that committed changes survive datastore reload"""
+    from changedetectionio.store import ChangeDetectionStore
+
+    datastore = client.application.config.get('DATASTORE')
+    datastore_path = datastore.datastore_path
+
+    # Create and modify a watch
+    uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test Watch'})
+    watch = datastore.data['watching'][uuid]
+    watch['title'] = 'Persisted Title'
+    watch['paused'] = True
+    watch['tags'] = ['tag-1', 'tag-2']
+    watch.commit()
+
+    # Simulate app restart - create new datastore instance
+    datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
+    datastore2.reload_state(
+        datastore_path=datastore_path,
+        include_default_watches=False,
+        version_tag='test'
+    )
+
+    # Check data survived
+    assert uuid in datastore2.data['watching'], "Watch should exist after reload"
+    reloaded_watch = datastore2.data['watching'][uuid]
+    assert reloaded_watch['title'] == 'Persisted Title', "Title should survive reload"
+    assert reloaded_watch['paused'] == True, "Paused state should survive reload"
+    assert reloaded_watch['tags'] == ['tag-1', 'tag-2'], "Tags should survive reload"
+
+
+def test_watch_commit_atomic_on_crash(client, live_server):
+    """Test that atomic writes prevent corruption (temp file pattern)"""
+    datastore = client.application.config.get('DATASTORE')
+
+    uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Original'})
+    watch = datastore.data['watching'][uuid]
+
+    # First successful commit
+    watch['title'] = 'First Save'
+    watch.commit()
+
+    # Verify watch.json exists and is valid
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)  # Should not raise JSONDecodeError
+        assert data['title'] == 'First Save'
+
+    # Second commit - even if interrupted, original file should be intact
+    # (atomic write uses temp file + rename, so original is never corrupted)
+    watch['title'] = 'Second Save'
+    watch.commit()
+
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+        assert data['title'] == 'Second Save'
+
+
+def test_multiple_watches_commit_independently(client, live_server):
+    """Test that committing one watch doesn't affect others"""
+    datastore = client.application.config.get('DATASTORE')
+
+    # Create multiple watches
+    uuid1 = datastore.add_watch(url='http://example1.com', extras={'title': 'Watch 1'})
+    uuid2 = datastore.add_watch(url='http://example2.com', extras={'title': 'Watch 2'})
+    uuid3 = datastore.add_watch(url='http://example3.com', extras={'title': 'Watch 3'})
+
+    watch1 = datastore.data['watching'][uuid1]
+    watch2 = datastore.data['watching'][uuid2]
+    watch3 = datastore.data['watching'][uuid3]
+
+    # Modify and commit only watch2
+    watch2['title'] = 'Modified Watch 2'
+    watch2['paused'] = True
+    watch2.commit()
+
+    # Read all from disk
+    def read_watch_json(uuid):
+        watch = datastore.data['watching'][uuid]
+        path = os.path.join(watch.data_dir, 'watch.json')
+        with open(path, 'r') as f:
+            return json.load(f)
+
+    data1 = read_watch_json(uuid1)
+    data2 = read_watch_json(uuid2)
+    data3 = read_watch_json(uuid3)
+
+    # Only watch2 should have changes
+    assert data1['title'] == 'Watch 1', "Watch 1 should be unchanged"
+    assert data1['paused'] == False, "Watch 1 should not be paused"
+
+    assert data2['title'] == 'Modified Watch 2', "Watch 2 should be modified"
+    assert data2['paused'] == True, "Watch 2 should be paused"
+
+    assert data3['title'] == 'Watch 3', "Watch 3 should be unchanged"
+    assert data3['paused'] == False, "Watch 3 should not be paused"
+
+
+# ==============================================================================
+# 3. Concurrency/Race Condition Tests
+# ==============================================================================
+
+def test_concurrent_watch_commits_dont_corrupt(client, live_server):
+    """Test that simultaneous commits to same watch don't corrupt JSON"""
+    datastore = client.application.config.get('DATASTORE')
+
+    uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
+    watch = datastore.data['watching'][uuid]
+
+    errors = []
+
+    def modify_and_commit(field, value):
+        try:
+            watch[field] = value
+            watch.commit()
+        except Exception as e:
+            errors.append(e)
+
+    # Run 10 concurrent commits
+    threads = []
+    for i in range(10):
+        t = threading.Thread(target=modify_and_commit, args=('title', f'Title {i}'))
+        threads.append(t)
+        t.start()
+
+    for t in threads:
+        t.join()
+
+    # Should not have any errors
+    assert len(errors) == 0, f"Expected no errors, got: {errors}"
+
+    # JSON file should still be valid (not corrupted)
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)  # Should not raise JSONDecodeError
+        assert data['uuid'] == uuid, "UUID should still be correct"
+        assert 'Title' in data['title'], "Title should contain 'Title'"
+
+
+def test_concurrent_modifications_during_commit(client, live_server):
+    """Test that modifying watch during commit doesn't cause RuntimeError"""
+    datastore = client.application.config.get('DATASTORE')
+
+    uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
+    watch = datastore.data['watching'][uuid]
+
+    errors = []
+    stop_flag = threading.Event()
+
+    def keep_modifying():
+        """Continuously modify watch"""
+        try:
+            i = 0
+            while not stop_flag.is_set():
+                watch['title'] = f'Title {i}'
+                watch['paused'] = i % 2 == 0
+                i += 1
+                time.sleep(0.001)
+        except Exception as e:
+            errors.append(('modifier', e))
+
+    def keep_committing():
+        """Continuously commit watch"""
+        try:
+            for _ in range(20):
+                watch.commit()
+                time.sleep(0.005)
+        except Exception as e:
+            errors.append(('committer', e))
+
+    # Start concurrent modification and commits
+    modifier = threading.Thread(target=keep_modifying)
+    committer = threading.Thread(target=keep_committing)
+
+    modifier.start()
+    committer.start()
+
+    committer.join()
+    stop_flag.set()
+    modifier.join()
+
+    # Should not have RuntimeError from dict changing during iteration
+    runtime_errors = [e for source, e in errors if isinstance(e, RuntimeError)]
+    assert len(runtime_errors) == 0, f"Should not have RuntimeError, got: {runtime_errors}"
+
+
+def test_datastore_lock_protects_commit_snapshot(client, live_server):
+    """Test that datastore.lock prevents race conditions during deepcopy"""
+    datastore = client.application.config.get('DATASTORE')
+
+    uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
+    watch = datastore.data['watching'][uuid]
+
+    # Add some complex nested data
+    watch['browser_steps'] = [
+        {'operation': 'click', 'selector': '#foo'},
+        {'operation': 'wait', 'seconds': 5}
+    ]
+
+    errors = []
+    commits_succeeded = [0]
+
+    def rapid_commits():
+        try:
+            for i in range(50):
+                watch['title'] = f'Title {i}'
+                watch.commit()
+                commits_succeeded[0] += 1
+                time.sleep(0.001)
+        except Exception as e:
+            errors.append(e)
+
+    # Multiple threads doing rapid commits
+    threads = [threading.Thread(target=rapid_commits) for _ in range(3)]
+
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join()
+
+    assert len(errors) == 0, f"Expected no errors, got: {errors}"
+    assert commits_succeeded[0] == 150, f"Expected 150 commits, got {commits_succeeded[0]}"
+
+    # Final JSON should be valid
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+        assert data['uuid'] == uuid
+
+
+# ==============================================================================
+# 4. Processor Config Separation Tests
+# ==============================================================================
+
+def test_processor_config_never_in_watch_json(client, live_server):
+    """Test that processor_config_* fields are filtered out of watch.json"""
+    datastore = client.application.config.get('DATASTORE')
+
+    uuid = datastore.add_watch(
+        url='http://example.com',
+        extras={
+            'title': 'Test Watch',
+            'processor': 'restock_diff'
+        }
+    )
+
+    watch = datastore.data['watching'][uuid]
+
+    # Try to set processor config fields (these should be filtered during commit)
+    watch['processor_config_price_threshold'] = 10.0
+    watch['processor_config_some_setting'] = 'value'
+    watch['processor_config_another'] = {'nested': 'data'}
+    watch.commit()
+
+    # Read watch.json from disk
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+
+    # Verify processor_config_* fields are NOT in watch.json
+    for key in data.keys():
+        assert not key.startswith('processor_config_'), \
+            f"Found {key} in watch.json - processor configs should be in separate file!"
+
+    # Normal fields should still be there
+    assert data['title'] == 'Test Watch'
+    assert data['processor'] == 'restock_diff'
+
+
+def test_api_post_saves_processor_config_separately(client, live_server):
+    """Test that API POST saves processor configs to {processor}.json"""
+    import json
+    from changedetectionio.processors import extract_processor_config_from_form_data
+
+    # Get API key
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    # Create watch via API with processor config
+    response = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            'url': 'http://example.com',
+            'processor': 'restock_diff',
+            'processor_config_price_threshold': 10.0,
+            'processor_config_in_stock_only': True
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key}
+    )
+
+    assert response.status_code in (200, 201), f"Expected 200/201, got {response.status_code}"
+    uuid = response.json.get('uuid')
+    assert uuid, "Should return UUID"
+
+    datastore = client.application.config.get('DATASTORE')
+    watch = datastore.data['watching'][uuid]
+
+    # Check that processor config file exists
+    processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
+    assert os.path.exists(processor_config_path), "Processor config file should exist"
+
+    with open(processor_config_path, 'r') as f:
+        config = json.load(f)
+
+    # Verify fields are saved WITHOUT processor_config_ prefix
+    assert config.get('price_threshold') == 10.0, "Should have price_threshold (no prefix)"
+    assert config.get('in_stock_only') == True, "Should have in_stock_only (no prefix)"
+    assert 'processor_config_price_threshold' not in config, "Should NOT have prefixed keys"
+
+
+def test_api_put_saves_processor_config_separately(client, live_server):
+    """Test that API PUT updates processor configs in {processor}.json"""
+    import json
+    datastore = client.application.config.get('DATASTORE')
+
+    # Get API key
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    # Create watch
+    uuid = datastore.add_watch(
+        url='http://example.com',
+        extras={'processor': 'restock_diff'}
+    )
+
+    # Update via API with processor config
+    response = client.put(
+        url_for("watch", uuid=uuid),
+        data=json.dumps({
+            'processor_config_price_threshold': 15.0,
+            'processor_config_min_stock': 5
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key}
+    )
+
+    # PUT might return different status codes, 200 or 204 are both OK
+    assert response.status_code in (200, 204), f"Expected 200/204, got {response.status_code}: {response.data}"
+
+    watch = datastore.data['watching'][uuid]
+
+    # Check processor config file
+    processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
+    assert os.path.exists(processor_config_path), "Processor config file should exist"
+
+    with open(processor_config_path, 'r') as f:
+        config = json.load(f)
+
+    assert config.get('price_threshold') == 15.0, "Should have updated price_threshold"
+    assert config.get('min_stock') == 5, "Should have min_stock"
+
+
+def test_ui_edit_saves_processor_config_separately(client, live_server):
+    """Test that processor_config_* fields never appear in watch.json (even from UI)"""
+    datastore = client.application.config.get('DATASTORE')
+
+    # Create watch
+    uuid = datastore.add_watch(
+        url='http://example.com',
+        extras={'processor': 'text_json_diff', 'title': 'Test'}
+    )
+
+    watch = datastore.data['watching'][uuid]
+
+    # Simulate someone accidentally trying to set processor_config fields directly
+    watch['processor_config_should_not_save'] = 'test_value'
+    watch['processor_config_another_field'] = 123
+    watch['normal_field'] = 'this_should_save'
+    watch.commit()
+
+    # Check watch.json has NO processor_config_* fields (main point of this test)
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        watch_data = json.load(f)
+
+    for key in watch_data.keys():
+        assert not key.startswith('processor_config_'), \
+            f"Found {key} in watch.json - processor configs should be filtered during commit"
+
+    # Verify normal fields still save
+    assert watch_data['normal_field'] == 'this_should_save', "Normal fields should save"
+    assert watch_data['title'] == 'Test', "Original fields should still be there"
+
+
+def test_browser_steps_normalized_to_empty_list(client, live_server):
+    """Test that meaningless browser_steps are normalized to [] during commit"""
+    datastore = client.application.config.get('DATASTORE')
+
+    uuid = datastore.add_watch(url='http://example.com')
+    watch = datastore.data['watching'][uuid]
+
+    # Set browser_steps to meaningless values
+    watch['browser_steps'] = [
+        {'operation': 'Choose one', 'selector': ''},
+        {'operation': 'Goto site', 'selector': ''},
+        {'operation': '', 'selector': '#foo'}
+    ]
+    watch.commit()
+
+    # Read from disk
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+
+    # Should be normalized to empty list
+    assert data['browser_steps'] == [], "Meaningless browser_steps should be normalized to []"
+
+
+# ==============================================================================
+# 5. Data Loss Prevention Tests
+# ==============================================================================
+
+def test_settings_persist_after_update(client, live_server):
+    """Test that settings updates are committed and survive restart"""
+    from changedetectionio.store import ChangeDetectionStore
+
+    datastore = client.application.config.get('DATASTORE')
+    datastore_path = datastore.datastore_path
+
+    # Update settings directly (bypass form validation issues)
+    datastore.data['settings']['application']['empty_pages_are_a_change'] = True
+    datastore.data['settings']['application']['fetch_backend'] = 'html_requests'
+    datastore.data['settings']['requests']['time_between_check']['minutes'] = 120
+    datastore.commit()
+
+    # Simulate restart
+    datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
+    datastore2.reload_state(
+        datastore_path=datastore_path,
+        include_default_watches=False,
+        version_tag='test'
+    )
+
+    # Verify settings survived
+    assert datastore2.data['settings']['application']['empty_pages_are_a_change'] == True, "empty_pages_are_a_change should persist"
+    assert datastore2.data['settings']['application']['fetch_backend'] == 'html_requests', "fetch_backend should persist"
+    assert datastore2.data['settings']['requests']['time_between_check']['minutes'] == 120, "time_between_check should persist"
+
+
+def test_tag_mute_persists(client, live_server):
+    """Test that tag mute/unmute operations persist"""
+    from changedetectionio.store import ChangeDetectionStore
+
+    datastore = client.application.config.get('DATASTORE')
+    datastore_path = datastore.datastore_path
+
+    # Add a tag
+    tag_uuid = datastore.add_tag('Test Tag')
+
+    # Mute the tag
+    response = client.get(url_for("tags.mute", uuid=tag_uuid))
+    assert response.status_code == 302  # Redirect
+
+    # Verify muted in memory
+    assert datastore.data['settings']['application']['tags'][tag_uuid]['notification_muted'] == True
+
+    # Simulate restart
+    datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
+    datastore2.reload_state(
+        datastore_path=datastore_path,
+        include_default_watches=False,
+        version_tag='test'
+    )
+
+    # Verify mute state survived
+    assert tag_uuid in datastore2.data['settings']['application']['tags']
+    assert datastore2.data['settings']['application']['tags'][tag_uuid]['notification_muted'] == True
+
+
+def test_tag_delete_removes_from_watches(client, live_server):
+    """Test that deleting a tag removes it from all watches"""
+    datastore = client.application.config.get('DATASTORE')
+
+    # Create a tag
+    tag_uuid = datastore.add_tag('Test Tag')
+
+    # Create watches with this tag
+    uuid1 = datastore.add_watch(url='http://example1.com')
+    uuid2 = datastore.add_watch(url='http://example2.com')
+    uuid3 = datastore.add_watch(url='http://example3.com')
+
+    watch1 = datastore.data['watching'][uuid1]
+    watch2 = datastore.data['watching'][uuid2]
+    watch3 = datastore.data['watching'][uuid3]
+
+    watch1['tags'] = [tag_uuid]
+    watch1.commit()
+    watch2['tags'] = [tag_uuid, 'other-tag']
+    watch2.commit()
+    # watch3 has no tags
+
+    # Delete the tag
+    response = client.get(url_for("tags.delete", uuid=tag_uuid))
+    assert response.status_code == 302
+
+    # Wait for background thread to complete
+    time.sleep(1)
+
+    # Tag should be removed from settings
+    assert tag_uuid not in datastore.data['settings']['application']['tags']
+
+    # Tag should be removed from watches and persisted
+    def check_watch_tags(uuid):
+        watch = datastore.data['watching'][uuid]
+        watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+        with open(watch_json_path, 'r') as f:
+            return json.load(f)['tags']
+
+    assert tag_uuid not in check_watch_tags(uuid1), "Tag should be removed from watch1"
+    assert tag_uuid not in check_watch_tags(uuid2), "Tag should be removed from watch2"
+    assert 'other-tag' in check_watch_tags(uuid2), "Other tags should remain in watch2"
+    assert check_watch_tags(uuid3) == [], "Watch3 should still have empty tags"
+
+
+def test_watch_pause_unpause_persists(client, live_server):
+    """Test that pause/unpause operations commit and persist"""
+    datastore = client.application.config.get('DATASTORE')
+
+    # Get API key
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    uuid = datastore.add_watch(url='http://example.com')
+    watch = datastore.data['watching'][uuid]
+
+    # Pause via API
+    response = client.get(url_for("watch", uuid=uuid, paused='paused'), headers={'x-api-key': api_key})
+    assert response.status_code == 200
+
+    # Check persisted to disk
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+    assert data['paused'] == True, "Pause should be persisted"
+
+    # Unpause
+    response = client.get(url_for("watch", uuid=uuid, paused='unpaused'), headers={'x-api-key': api_key})
+    assert response.status_code == 200
+
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+    assert data['paused'] == False, "Unpause should be persisted"
+
+
+def test_watch_mute_unmute_persists(client, live_server):
+    """Test that mute/unmute operations commit and persist"""
+    datastore = client.application.config.get('DATASTORE')
+
+    # Get API key
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    uuid = datastore.add_watch(url='http://example.com')
+    watch = datastore.data['watching'][uuid]
+
+    # Mute via API
+    response = client.get(url_for("watch", uuid=uuid, muted='muted'), headers={'x-api-key': api_key})
+    assert response.status_code == 200
+
+    # Check persisted to disk
+    watch_json_path = os.path.join(watch.data_dir, 'watch.json')
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+    assert data['notification_muted'] == True, "Mute should be persisted"
+
+    # Unmute
+    response = client.get(url_for("watch", uuid=uuid, muted='unmuted'), headers={'x-api-key': api_key})
+    assert response.status_code == 200
+
+    with open(watch_json_path, 'r') as f:
+        data = json.load(f)
+    assert data['notification_muted'] == False, "Unmute should be persisted"
+
+
+def test_ui_watch_edit_persists_all_fields(client, live_server):
+    """Test that UI watch edit form persists all modified fields"""
+    from changedetectionio.store import ChangeDetectionStore
+
+    datastore = client.application.config.get('DATASTORE')
+    datastore_path = datastore.datastore_path
+
+    # Create watch
+    uuid = datastore.add_watch(url='http://example.com')
+
+    # Edit via UI with multiple field changes
+    response = client.post(
+        url_for("ui.ui_edit.edit_page", uuid=uuid),
+        data={
+            'url': 'http://updated-example.com',
+            'title': 'Updated Watch Title',
+            'time_between_check-hours': '2',
+            'time_between_check-minutes': '30',
+            'include_filters': '#content',
+            'fetch_backend': 'html_requests',
+            'method': 'POST',
+            'ignore_text': 'Advertisement\nTracking'
+        },
+        follow_redirects=True
+    )
+
+    assert b"Updated watch" in response.data or b"Saved" in response.data
+
+    # Simulate restart
+    datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
+    datastore2.reload_state(
+        datastore_path=datastore_path,
+        include_default_watches=False,
+        version_tag='test'
+    )
+
+    # Verify all fields survived
+    watch = datastore2.data['watching'][uuid]
+    assert watch['url'] == 'http://updated-example.com'
+    assert watch['title'] == 'Updated Watch Title'
+    assert watch['time_between_check']['hours'] == 2
+    assert watch['time_between_check']['minutes'] == 30
+    assert watch['fetch_backend'] == 'html_requests'
+    assert watch['method'] == 'POST'
@@ -5,6 +5,8 @@ from flask import url_for
 from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
 import os

+from ..store import ChangeDetectionStore
+

 # def test_setup(client, live_server, measure_memory_usage, datastore_path):
   #  live_server_setup(live_server) # Setup on conftest per function
@@ -474,3 +476,143 @@ the {test} appeared before. {test in res.data[:n]=}
        n += t_index + len(test)

    delete_all_watches(client)
+
+
+def test_tag_json_persistence(client, live_server, measure_memory_usage, datastore_path):
+    """
+    Test that tags are saved to individual tag.json files and loaded correctly.
+
+    This test verifies the update_27 tag storage refactoring:
+    - Tags are saved to {uuid}/tag.json files
+    - Tags persist across datastore restarts
+    - Tag edits write to tag.json
+    - Tag deletion removes tag.json file
+    """
+    import json
+
+    datastore = client.application.config.get('DATASTORE')
+
+    # 1. Create a tag
+    res = client.post(
+        url_for("tags.form_tag_add"),
+        data={"name": "persistence-test-tag"},
+        follow_redirects=True
+    )
+    assert b"Tag added" in res.data
+
+    tag_uuid = get_UUID_for_tag_name(client, name="persistence-test-tag")
+    assert tag_uuid, "Tag UUID should exist"
+
+    # 2. Verify tag.json file was created
+    tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
+    assert os.path.exists(tag_json_path), f"tag.json should exist at {tag_json_path}"
+
+    # 3. Verify tag.json contains correct data
+    with open(tag_json_path, 'r') as f:
+        tag_data = json.load(f)
+    assert tag_data['title'] == 'persistence-test-tag'
+    assert tag_data['uuid'] == tag_uuid
+    assert 'date_created' in tag_data
+
+    # 4. Edit the tag
+    res = client.post(
+        url_for("tags.form_tag_edit_submit", uuid=tag_uuid),
+        data={
+            "name": "persistence-test-tag",
+            "notification_muted": True,
+            "include_filters": '#test-filter'
+        },
+        follow_redirects=True
+    )
+    assert b"Updated" in res.data
+
+    # 5. Verify tag.json was updated
+    with open(tag_json_path, 'r') as f:
+        tag_data = json.load(f)
+    assert tag_data['notification_muted'] == True
+    assert '#test-filter' in tag_data.get('include_filters', [])
+
+    # 5a. Verify tag is NOT in changedetection.json (tags should be in tag.json only)
+    changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
+    with open(changedetection_json_path, 'r') as f:
+        settings_data = json.load(f)
+    # Tags dict should be empty in settings (all tags are in individual files)
+    assert settings_data['settings']['application']['tags'] == {}, \
+        "Tags should NOT be saved to changedetection.json (should be empty dict)"
+
+    # 6. Simulate restart - reload datastore
+    datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
+
+    # 7. Verify tag was loaded from tag.json
+    assert tag_uuid in datastore2.data['settings']['application']['tags']
+    loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
+    assert loaded_tag['title'] == 'persistence-test-tag'
+    assert loaded_tag['notification_muted'] == True
+    assert '#test-filter' in loaded_tag.get('include_filters', [])
+
+    # 8. Delete the tag via API
+    res = client.get(url_for("tags.delete", uuid=tag_uuid), follow_redirects=True)
+    assert b"Tag deleted" in res.data
+
+    # 9. Verify tag.json file was deleted
+    assert not os.path.exists(tag_json_path), f"tag.json should be deleted at {tag_json_path}"
+
+    # 10. Verify tag is removed from settings
+    assert tag_uuid not in datastore.data['settings']['application']['tags']
+
+    delete_all_watches(client)
+
+
+def test_tag_json_migration_update_27(client, live_server, measure_memory_usage, datastore_path):
+    """
+    Test that update_27 migration correctly moves tags to individual files.
+
+    This simulates a pre-update_27 datastore and verifies migration works.
+    """
+    import json
+
+    # 1. Create multiple tags
+    tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']
+    tag_uuids = []
+
+    for tag_name in tag_names:
+        res = client.post(
+            url_for("tags.form_tag_add"),
+            data={"name": tag_name},
+            follow_redirects=True
+        )
+        assert b"Tag added" in res.data
+        tag_uuid = get_UUID_for_tag_name(client, name=tag_name)
+        tag_uuids.append(tag_uuid)
+
+    # 2. Verify all tag.json files exist (update_27 already ran during add_tag)
+    for tag_uuid in tag_uuids:
+        tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
+        assert os.path.exists(tag_json_path), f"tag.json should exist for {tag_uuid}"
+
+    # 2a. Verify tags are NOT in changedetection.json
+    changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
+    with open(changedetection_json_path, 'r') as f:
+        settings_data = json.load(f)
+    assert settings_data['settings']['application']['tags'] == {}, \
+        "Tags should NOT be in changedetection.json after migration"
+
+    # 3. Simulate restart
+    datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
+
+    # 4. Verify all tags loaded from tag.json files
+    for idx, tag_uuid in enumerate(tag_uuids):
+        assert tag_uuid in datastore2.data['settings']['application']['tags']
+        loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
+        assert loaded_tag['title'] == tag_names[idx]
+
+    # Cleanup
+    res = client.get(url_for("tags.delete_all"), follow_redirects=True)
+    assert b'All tags deleted' in res.data
+
+    # Verify all tag.json files were deleted
+    for tag_uuid in tag_uuids:
+        tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
+        assert not os.path.exists(tag_json_path), f"tag.json should be deleted for {tag_uuid}"
+
+    delete_all_watches(client)
@@ -100,11 +100,11 @@ class TestDiffBuilder(unittest.TestCase):
        # Test 1: Deepcopy shares datastore reference (doesn't copy it)
        watch_copy = deepcopy(watches[0])

-        self.assertIsNotNone(watch_copy._model__datastore,
+        self.assertIsNotNone(watch_copy._datastore,
                            "__datastore should exist in copied watch")
-        self.assertIs(watch_copy._model__datastore, watches[0]._model__datastore,
+        self.assertIs(watch_copy._datastore, watches[0]._datastore,
                     "__datastore should be SHARED (same object), not copied")
-        self.assertIs(watch_copy._model__datastore, mock_datastore,
+        self.assertIs(watch_copy._datastore, mock_datastore,
                     "__datastore should reference the original datastore")

        # Test 2: Dict data is properly copied (not shared)
@@ -130,7 +130,7 @@ class TestDiffBuilder(unittest.TestCase):

        # All copies should share the same datastore
        for copy in copies:
-            self.assertIs(copy._model__datastore, mock_datastore,
+            self.assertIs(copy._datastore, mock_datastore,
                         "All copies should share the original datastore")

    def test_watch_pickle_doesnt_serialize_datastore(self):
@@ -160,7 +160,7 @@ class TestDiffBuilder(unittest.TestCase):
                        "Dict data should be preserved after pickle/unpickle")

        # Test 2: __datastore is NOT serialized (attribute shouldn't exist after unpickle)
-        self.assertFalse(hasattr(unpickled_watch, '_model__datastore'),
+        self.assertFalse(hasattr(unpickled_watch, '_datastore'),
                         "__datastore attribute should not exist after unpickle (not serialized)")

        # Test 3: Pickled data shouldn't contain the large datastore object
@@ -208,8 +208,8 @@ class TestDiffBuilder(unittest.TestCase):
                           "Modifying copy should not affect original")

        # Test 5: Tag with datastore shares it (doesn't copy it)
-        if hasattr(tag_with_ds, '_model__datastore'):
-            self.assertIs(tag_copy2._model__datastore, tag_with_ds._model__datastore,
+        if hasattr(tag_with_ds, '_datastore'):
+            self.assertIs(tag_copy2._datastore, tag_with_ds._datastore,
                         "Tag should share __datastore reference like Watch does")

    def test_watch_copy_performance(self):
@@ -161,11 +161,6 @@ def extract_UUID_from_client(client):

 def delete_all_watches(client=None):

-    # Change tracking
-    client.application.config.get('DATASTORE')._dirty_watches = set()      # Watch UUIDs that need saving
-    client.application.config.get('DATASTORE')._dirty_settings = False     # Settings changed
-    client.application.config.get('DATASTORE')._watch_hashes = {}          # UUID -> SHA256 hash for change detection
-
    uuids = list(client.application.config.get('DATASTORE').data['watching'])
    for uuid in uuids:
        client.application.config.get('DATASTORE').delete(uuid)
@@ -28,7 +28,7 @@ info:
    
    For example: `x-api-key: YOUR_API_KEY`
    
-  version: 0.1.4
+  version: 0.1.5
  contact:
    name: ChangeDetection.io
    url: https://github.com/dgtlmoon/changedetection.io
@@ -1503,46 +1503,92 @@ paths:
    post:
      operationId: importWatches
      tags: [Import]
-      summary: Import watch URLs
-      description: Import a list of URLs to monitor. Accepts line-separated URLs in request body.
+      summary: Import watch URLs with configuration
+      description: |
+        Import a list of URLs to monitor with optional watch configuration. Accepts line-separated URLs in request body.
+
+        **Configuration via Query Parameters:**
+
+        You can pass ANY watch configuration field as query parameters to apply settings to all imported watches.
+        All parameters from the Watch schema are supported (processor, fetch_backend, notification_urls, etc.).
+
+        **Special Parameters:**
+        - `tag` / `tag_uuids` - Assign tags to imported watches
+        - `proxy` - Use specific proxy for imported watches
+        - `dedupe` - Skip duplicate URLs (default: true)
+
+        **Type Conversion:**
+        - Booleans: `true`, `false`, `1`, `0`, `yes`, `no`
+        - Arrays: Comma-separated or JSON format (`[item1,item2]`)
+        - Objects: JSON format (`{"key":"value"}`)
+        - Numbers: Parsed as int or float
      x-code-samples:
        - lang: 'curl'
          source: |
+            # Basic import
            curl -X POST "http://localhost:5000/api/v1/import" \
              -H "x-api-key: YOUR_API_KEY" \
              -H "Content-Type: text/plain" \
              -d $'https://example.com\nhttps://example.org\nhttps://example.net'
+
+            # Import with processor and fetch backend
+            curl -X POST "http://localhost:5000/api/v1/import?processor=restock_diff&fetch_backend=html_webdriver" \
+              -H "x-api-key: YOUR_API_KEY" \
+              -H "Content-Type: text/plain" \
+              -d $'https://example.com\nhttps://example.org'
+
+            # Import with multiple settings
+            curl -X POST "http://localhost:5000/api/v1/import?processor=restock_diff&paused=true&tag=production" \
+              -H "x-api-key: YOUR_API_KEY" \
+              -H "Content-Type: text/plain" \
+              -d $'https://example.com'
        - lang: 'Python'
          source: |
            import requests
-            
+
            headers = {
                'x-api-key': 'YOUR_API_KEY',
                'Content-Type': 'text/plain'
            }
+
+            # Basic import
            urls = 'https://example.com\nhttps://example.org\nhttps://example.net'
-            response = requests.post('http://localhost:5000/api/v1/import', 
+            response = requests.post('http://localhost:5000/api/v1/import',
                                   headers=headers, data=urls)
            print(response.json())
+
+            # Import with configuration
+            params = {
+                'processor': 'restock_diff',
+                'fetch_backend': 'html_webdriver',
+                'paused': 'false',
+                'tag': 'production'
+            }
+            response = requests.post('http://localhost:5000/api/v1/import',
+                                   headers=headers, params=params, data=urls)
+            print(response.json())
      parameters:
        - name: tag_uuids
          in: query
-          description: Tag UUID to apply to imported web page change monitors (watches)
+          description: Tag UUID(s) to apply to imported watches (comma-separated for multiple)
          schema:
            type: string
+          example: "550e8400-e29b-41d4-a716-446655440000"
        - name: tag
          in: query
-          description: Tag name to apply to imported web page change monitors (watches)
+          description: Tag name to apply to imported watches
          schema:
            type: string
+          example: "production"
        - name: proxy
          in: query
-          description: Proxy key to use for imported web page change monitors (watches)
+          description: Proxy key to use for imported watches
          schema:
            type: string
+          example: "proxy1"
        - name: dedupe
          in: query
-          description: Remove duplicate URLs (default true)
+          description: Skip duplicate URLs (default true)
          schema:
            type: boolean
            default: true
@@ -91,7 +91,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"

 # playwright is installed at Dockerfile build time because it's not available on all platforms

-pyppeteer-ng==2.0.0rc12
+pyppeteer-ng==2.0.0rc13
 pyppeteerstealth>=0.0.4

 # Include pytest, so if theres a support issue we can ask them to run these tests on their setup
@@ -148,7 +148,7 @@ tzdata
 pluggy ~= 1.6

 # Needed for testing, cross-platform for process and system monitoring
-psutil==7.2.1
+psutil==7.2.2

 ruff >= 0.11.2
 pre_commit >= 4.2.0
Author	SHA1	Message	Date
dgtlmoon	9548f5bd8f	Also URL addition in upgrade	2026-02-11 16:03:23 +01:00
dgtlmoon	5718280518	Use detactedh sha instead	2026-02-11 15:56:56 +01:00
dgtlmoon	b24ae45860	extra test	2026-02-11 15:53:01 +01:00
dgtlmoon	0e4e1cf65e	Correct test of init	2026-02-11 15:47:03 +01:00
dgtlmoon	d810dc38f4	deep fetch	2026-02-11 15:37:25 +01:00
dgtlmoon	c1e9e012e3	upgrade path check	2026-02-11 15:34:18 +01:00
dgtlmoon	5c29f1cee8	Adding test step for upgrades	2026-02-11 15:33:07 +01:00
dgtlmoon	a0b8d8e3ca	Better to quit	2026-02-11 15:17:19 +01:00
dgtlmoon	1942d42b06	Refactoring upgrade path	2026-02-11 15:13:23 +01:00
dgtlmoon	5726c5a0ac	API - Import use background task to import large lists (#3858 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-11 08:15:58 +01:00
dgtlmoon	80f7decf4f	API - Bumping docs	2026-02-11 07:44:45 +01:00
dgtlmoon	c66a29b011	API - Import - Ability to set any watch value as HTTP URL Query value, for example ?processor=restock_diff&time_between_check={'hours':24} Re #3845 (#3857 )	2026-02-11 07:26:48 +01:00
dgtlmoon	a1a2e5c5bf	API - Include missing `tags` in fetching watch information. #3854 (#3856 )	2026-02-11 06:45:19 +01:00
dgtlmoon	6e90a0bbd1	UI - Bulk checkbox operations modal confirmation fix Re #3853	2026-02-11 06:29:59 +01:00
dgtlmoon	987789425d	Tags update fix (#3849 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-07 17:13:41 +01:00
dgtlmoon	892b645147	Refactor for Tags storage (#3848 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-07 13:13:02 +01:00
dgtlmoon	278da3fa9b	Including uptime in UI settings/info Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-07 03:50:49 +01:00
dgtlmoon	c577bd700c	Refactor watch saving backend, closes #3846 (#3847 )	2026-02-07 03:41:35 +01:00
dependabot[bot]	d4d6bb2872	Bump psutil from 7.2.1 to 7.2.2 (#3844 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details	2026-02-06 19:55:04 +01:00
dependabot[bot]	45fb262386	Bump pyppeteer-ng from 2.0.0rc12 to 2.0.0rc13 (#3843 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-06 01:33:10 +01:00
dgtlmoon	1058debc12	Fix for When MoreThanOnePriceFound() is raised, plugins dont fire #3840 #3833 Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-05 20:07:47 +01:00
dgtlmoon	61b41b0b16	Rebuild translations (#3842 )	2026-02-05 18:17:46 +01:00