WIP

Minor cache
2026-04-05 18:47:58 +00:00 · 2026-02-13 15:44:54 +01:00 · 2026-02-13 15:15:34 +01:00 · 2026-02-13 14:58:23 +01:00 · 2026-02-13 14:54:34 +01:00 · 2026-02-13 14:52:34 +01:00
24 changed files with 2090 additions and 596 deletions
--- a/.github/workflows/test-stack-reusable-workflow.yml
+++ b/.github/workflows/test-stack-reusable-workflow.yml
@@ -103,7 +103,7 @@ jobs:
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
+          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'         
  # Basic pytest tests with ancillary services
  basic-tests:
@@ -516,3 +516,142 @@ jobs:
            exit 1
          fi
          docker rm sig-test
  # Upgrade path test
  upgrade-path-test:
    runs-on: ubuntu-latest
    needs: build
    timeout-minutes: 25
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0  # Fetch all history and tags for upgrade testing
      - name: Set up Python ${{ env.PYTHON_VERSION }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ env.PYTHON_VERSION }}
      - name: Check upgrade works without error
        run: |
          echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
          # Checkout old version and create datastore
          git checkout 0.49.1
          python3 -m venv .venv
          source .venv/bin/activate
          pip install -r requirements.txt
          pip install 'pyOpenSSL>=23.2.0'
          echo "=== Running version 0.49.1 to create datastore ==="
          python3 ./changedetection.py -C -d /tmp/data &
          APP_PID=$!
          # Wait for app to be ready
          echo "Waiting for 0.49.1 to be ready..."
          sleep 6
          # Extract API key from datastore (0.49.1 uses url-watches.json)
          API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
          echo "API Key: ${API_KEY:0:8}..."
          # Create a watch with tag "github-group-test" via API
          echo "Creating test watch with tag via API..."
          curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
            -H "x-api-key: ${API_KEY}" \
            -H "Content-Type: application/json" \
            --show-error --fail \
            --retry 6 --retry-delay 1 --retry-connrefused \
            -d '{
              "url": "https://example.com/upgrade-test",
              "tag": "github-group-test"
            }'
          echo "✓ Created watch with tag 'github-group-test'"
          # Create a specific test URL watch
          echo "Creating test URL watch via API..."
          curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
            -H "x-api-key: ${API_KEY}" \
            -H "Content-Type: application/json" \
            --show-error --fail \
            -d '{
              "url": "http://localhost/test.txt"
            }'
          echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
          # Stop the old version gracefully
          kill $APP_PID
          wait $APP_PID || true
          echo "✓ Version 0.49.1 stopped"
          # Upgrade to current version (use commit SHA since we're in detached HEAD)
          echo "Upgrading to commit ${{ github.sha }}"
          git checkout ${{ github.sha }}
          pip install -r requirements.txt
          echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
          TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
          echo "=== Upgrade test output ==="
          cat /tmp/upgrade-test.log
          echo "✓ Datastore upgraded successfully"
          # Now start the current version normally to verify the tag survived
          echo "=== Starting current version to verify tag exists after upgrade ==="
          timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
          APP_PID=$!
          # Wait for app to be ready and fetch UI
          echo "Waiting for current version to be ready..."
          sleep 5
          curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
          # Verify tag exists in UI
          if grep -q "github-group-test" /tmp/ui-output.html; then
            echo "✓ Tag 'github-group-test' found in UI after upgrade"
          else
            echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
            echo "=== UI Output ==="
            cat /tmp/ui-output.html
            echo "=== App Log ==="
            cat /tmp/ui-test.log
            kill $APP_PID || true
            exit 1
          fi
          # Verify test URL exists in UI
          if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
            echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
          else
            echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
            echo "=== UI Output ==="
            cat /tmp/ui-output.html
            echo "=== App Log ==="
            cat /tmp/ui-test.log
            kill $APP_PID || true
            exit 1
          fi
          # Cleanup
          kill $APP_PID || true
          wait $APP_PID || true
          echo ""
          echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
          echo "    - Commit: ${{ github.sha }}"
          echo "    - Datastore migrated successfully"
          echo "    - Tag 'github-group-test' survived upgrade"
          echo "    - Watch URL 'http://localhost/test.txt' survived upgrade"
          echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
      - name: Upload upgrade test logs
        if: always()
        uses: actions/upload-artifact@v6
        with:
          name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
          path: /tmp/upgrade-test.log
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -371,7 +371,15 @@ def main():
        # Dont' start if the JSON DB looks corrupt
        logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
        logger.critical(str(e))
-        return
+        sys.exit(1)
    # Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
    if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
        logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
        logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
        logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
        logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
        sys.exit(0)
    # Apply all_paused setting if specified via CLI
    if all_paused is not None:
--- a/changedetectionio/api/Import.py
+++ b/changedetectionio/api/Import.py
@@ -2,7 +2,7 @@ from changedetectionio.strtobool import strtobool
 from flask_restful import abort, Resource
 from flask import request
 from functools import wraps
-from . import auth, validate_openapi_request, schema_create_watch
+from . import auth, validate_openapi_request
 from ..validate_url import is_safe_valid_url
 import json
@@ -33,9 +33,25 @@ def convert_query_param_to_type(value, schema_property):
    Returns:
        Converted value in the appropriate type
    Supports both OpenAPI 3.1 formats:
    - type: [string, 'null']  (array format)
    - anyOf: [{type: string}, {type: null}]  (anyOf format)
    """
-    # Handle anyOf schemas (extract the first type)
+    prop_type = schema_property.get('type')
-    if 'anyOf' in schema_property:
+
    # Handle OpenAPI 3.1 type arrays: type: [string, 'null']
    if isinstance(prop_type, list):
        # Use the first non-null type from the array
        for t in prop_type:
            if t != 'null':
                prop_type = t
                break
        else:
            prop_type = None
    # Handle anyOf schemas (older format)
    elif 'anyOf' in schema_property:
        # Use the first non-null type from anyOf
        for option in schema_property['anyOf']:
            if option.get('type') and option.get('type') != 'null':
@@ -43,8 +59,6 @@ def convert_query_param_to_type(value, schema_property):
                break
        else:
            prop_type = None
    else:
        prop_type = schema_property.get('type')
    # Handle array type (e.g., notification_urls)
    if prop_type == 'array':
@@ -89,7 +103,7 @@ class Import(Resource):
    @validate_openapi_request('importWatches')
    def post(self):
        """Import a list of watched URLs with optional watch configuration."""
-
+        from . import get_watch_schema_properties
        # Special parameters that are NOT watch configuration
        special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}
@@ -115,7 +129,8 @@ class Import(Resource):
            tag_uuids = tag_uuids.split(',')
        # Extract ALL other query parameters as watch configuration
-        schema_properties = schema_create_watch.get('properties', {})
+        # Get schema from OpenAPI spec (replaces old schema_create_watch)
        schema_properties = get_watch_schema_properties()
        for param_name, param_value in request.args.items():
            # Skip special parameters
            if param_name in special_params:
--- a/changedetectionio/api/Notifications.py
+++ b/changedetectionio/api/Notifications.py
@@ -1,8 +1,6 @@
 from flask_expects_json import expects_json
 from flask_restful import Resource, abort
 from flask import request
 from . import auth, validate_openapi_request
 from . import schema_create_notification_urls, schema_delete_notification_urls
 class Notifications(Resource):
    def __init__(self, **kwargs):
@@ -22,7 +20,6 @@ class Notifications(Resource):
    @auth.check_token
    @validate_openapi_request('addNotifications')
    @expects_json(schema_create_notification_urls)
    def post(self):
        """Create Notification URLs."""
@@ -50,7 +47,6 @@ class Notifications(Resource):
    @auth.check_token
    @validate_openapi_request('replaceNotifications')
    @expects_json(schema_create_notification_urls)
    def put(self):
        """Replace Notification URLs."""
        json_data = request.get_json()
@@ -73,7 +69,6 @@ class Notifications(Resource):
    @auth.check_token
    @validate_openapi_request('deleteNotifications')
    @expects_json(schema_delete_notification_urls)
    def delete(self):
        """Delete Notification URLs."""
--- a/changedetectionio/api/Tags.py
+++ b/changedetectionio/api/Tags.py
@@ -1,6 +1,5 @@
 from changedetectionio import queuedWatchMetaData
 from changedetectionio import worker_pool
 from flask_expects_json import expects_json
 from flask_restful import abort, Resource
 from loguru import logger
@@ -8,8 +7,7 @@ import threading
 from flask import request
 from . import auth
-# Import schemas from __init__.py
+from . import validate_openapi_request
 from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
 class Tag(Resource):
@@ -69,7 +67,25 @@ class Tag(Resource):
            tag.commit()
            return "OK", 200
-        return tag
+        # Filter out Watch-specific runtime fields that don't apply to Tags (yet)
        # TODO: Future enhancement - aggregate these values from all Watches that have this tag:
        #   - check_count: sum of all watches' check_count
        #   - last_checked: most recent last_checked from all watches
        #   - last_changed: most recent last_changed from all watches
        #   - consecutive_filter_failures: count of watches with failures
        #   - etc.
        # These come from watch_base inheritance but currently have no meaningful value for Tags
        watch_only_fields = {
            'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures',
            'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error',
            'last_notification_error', 'last_viewed', 'notification_alert_count',
            'page_title', 'previous_md5', 'previous_md5_before_filters', 'remote_server_reply'
        }
        # Create clean tag dict without Watch-specific fields
        clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}
        return clean_tag
    @auth.check_token
    @validate_openapi_request('deleteTag')
@@ -102,24 +118,46 @@ class Tag(Resource):
    @auth.check_token
    @validate_openapi_request('updateTag')
    @expects_json(schema_update_tag)
    def put(self, uuid):
        """Update tag information."""
        tag = self.datastore.data['settings']['application']['tags'].get(uuid)
        if not tag:
            abort(404, message='No tag exists with the UUID of {}'.format(uuid))
        # Make a mutable copy of request.json for modification
        json_data = dict(request.json)
        # Validate notification_urls if provided
-        if 'notification_urls' in request.json:
+        if 'notification_urls' in json_data:
            from wtforms import ValidationError
            from changedetectionio.api.Notifications import validate_notification_urls
            try:
-                notification_urls = request.json.get('notification_urls', [])
+                notification_urls = json_data.get('notification_urls', [])
                validate_notification_urls(notification_urls)
            except ValidationError as e:
                return str(e), 400
-        tag.update(request.json)
+        # Filter out readOnly fields (extracted from OpenAPI spec Tag schema)
        # These are system-managed fields that should never be user-settable
        from . import get_readonly_tag_fields
        readonly_fields = get_readonly_tag_fields()
        # Tag model inherits from watch_base but has no @property attributes of its own
        # So we only need to filter readOnly fields
        for field in readonly_fields:
            json_data.pop(field, None)
        # Validate remaining fields - reject truly unknown fields
        # Get valid fields from Tag schema
        from . import get_tag_schema_properties
        valid_fields = set(get_tag_schema_properties().keys())
        # Check for unknown fields
        unknown_fields = set(json_data.keys()) - valid_fields
        if unknown_fields:
            return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
        tag.update(json_data)
        tag.commit()
        return "OK", 200
@@ -127,13 +165,21 @@ class Tag(Resource):
    @auth.check_token
    @validate_openapi_request('createTag')
    # Only cares for {'title': 'xxxx'}
    def post(self):
        """Create a single tag/group."""
        json_data = request.get_json()
        title = json_data.get("title",'').strip()
        # Validate that only valid fields are provided
        # Get valid fields from Tag schema
        from . import get_tag_schema_properties
        valid_fields = set(get_tag_schema_properties().keys())
        # Check for unknown fields
        unknown_fields = set(json_data.keys()) - valid_fields
        if unknown_fields:
            return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
        new_uuid = self.datastore.add_tag(title=title)
        if new_uuid:
--- a/changedetectionio/api/Watch.py
+++ b/changedetectionio/api/Watch.py
@@ -8,13 +8,11 @@ from . import auth
 from changedetectionio import queuedWatchMetaData, strtobool
 from changedetectionio import worker_pool
 from flask import request, make_response, send_from_directory
 from flask_expects_json import expects_json
 from flask_restful import abort, Resource
 from loguru import logger
 import copy
-# Import schemas from __init__.py
+from . import validate_openapi_request, get_readonly_watch_fields
 from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
 from ..notification import valid_notification_formats
 from ..notification.handler import newline_re
@@ -121,7 +119,6 @@ class Watch(Resource):
    @auth.check_token
    @validate_openapi_request('updateWatch')
    @expects_json(schema_update_watch)
    def put(self, uuid):
        """Update watch information."""
        watch = self.datastore.data['watching'].get(uuid)
@@ -175,6 +172,35 @@ class Watch(Resource):
        # Extract and remove processor config fields from json_data
        processor_config_data = processors.extract_processor_config_from_form_data(json_data)
        # Filter out readOnly fields (extracted from OpenAPI spec Watch schema)
        # These are system-managed fields that should never be user-settable
        readonly_fields = get_readonly_watch_fields()
        # Also filter out @property attributes (computed/derived values from the model)
        # These are not stored and should be ignored in PUT requests
        from changedetectionio.model.Watch import model as WatchModel
        property_fields = WatchModel.get_property_names()
        # Combine both sets of fields to ignore
        fields_to_ignore = readonly_fields | property_fields
        # Remove all ignored fields from update data
        for field in fields_to_ignore:
            json_data.pop(field, None)
        # Validate remaining fields - reject truly unknown fields
        # Get valid fields from WatchBase schema
        from . import get_watch_schema_properties
        valid_fields = set(get_watch_schema_properties().keys())
        # Also allow last_viewed (explicitly defined in UpdateWatch schema)
        valid_fields.add('last_viewed')
        # Check for unknown fields
        unknown_fields = set(json_data.keys()) - valid_fields
        if unknown_fields:
            return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
        # Update watch with regular (non-processor-config) fields
        watch.update(json_data)
        watch.commit()
@@ -393,7 +419,6 @@ class CreateWatch(Resource):
    @auth.check_token
    @validate_openapi_request('createWatch')
    @expects_json(schema_create_watch)
    def post(self):
        """Create a single watch."""
--- a/changedetectionio/api/init.py
+++ b/changedetectionio/api/init.py
@@ -1,41 +1,6 @@
 import copy
 import functools
 from flask import request, abort
 from loguru import logger
 from . import api_schema
 from ..model import watch_base
 # Build a JSON Schema atleast partially based on our Watch model
 watch_base_config = watch_base()
 schema = api_schema.build_watch_json_schema(watch_base_config)
 schema_create_watch = copy.deepcopy(schema)
 schema_create_watch['required'] = ['url']
 del schema_create_watch['properties']['last_viewed']
 # Allow processor_config_* fields (handled separately in endpoint)
 schema_create_watch['patternProperties'] = {
    '^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
 }
 schema_update_watch = copy.deepcopy(schema)
 schema_update_watch['additionalProperties'] = False
 # Allow processor_config_* fields (handled separately in endpoint)
 schema_update_watch['patternProperties'] = {
    '^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
 }
 # Tag schema is also based on watch_base since Tag inherits from it
 schema_tag = copy.deepcopy(schema)
 schema_create_tag = copy.deepcopy(schema_tag)
 schema_create_tag['required'] = ['title']
 schema_update_tag = copy.deepcopy(schema_tag)
 schema_update_tag['additionalProperties'] = False
 schema_notification_urls = copy.deepcopy(schema)
 schema_create_notification_urls = copy.deepcopy(schema_notification_urls)
 schema_create_notification_urls['required'] = ['notification_urls']
 schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
 schema_delete_notification_urls['required'] = ['notification_urls']
@functools.cache
 def get_openapi_spec():
@@ -54,6 +19,134 @@ def get_openapi_spec():
    _openapi_spec = OpenAPI.from_dict(spec_dict)
    return _openapi_spec
@functools.cache
 def get_openapi_schema_dict():
    """
    Get the raw OpenAPI spec dictionary for schema access.
    Used by Import endpoint to validate and convert query parameters.
    Returns the YAML dict directly (not the OpenAPI object).
    """
    import os
    import yaml
    spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
    if not os.path.exists(spec_path):
        spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
    with open(spec_path, 'r', encoding='utf-8') as f:
        return yaml.safe_load(f)
@functools.cache
 def _resolve_schema_properties(schema_name):
    """
    Generic helper to resolve schema properties, including allOf inheritance.
    Args:
        schema_name: Name of the schema (e.g., 'WatchBase', 'Watch', 'Tag')
    Returns:
        dict: All properties including inherited ones from $ref schemas
    """
    spec_dict = get_openapi_schema_dict()
    schema = spec_dict['components']['schemas'].get(schema_name, {})
    properties = {}
    # Handle allOf (schema inheritance)
    if 'allOf' in schema:
        for item in schema['allOf']:
            # Resolve $ref to parent schema
            if '$ref' in item:
                ref_path = item['$ref'].split('/')[-1]
                ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
                properties.update(ref_schema.get('properties', {}))
            # Add schema-specific properties
            if 'properties' in item:
                properties.update(item['properties'])
    else:
        # Direct properties (no inheritance)
        properties = schema.get('properties', {})
    return properties
@functools.cache
 def _resolve_readonly_fields(schema_name):
    """
    Generic helper to resolve readOnly fields, including allOf inheritance.
    Args:
        schema_name: Name of the schema (e.g., 'Watch', 'Tag')
    Returns:
        frozenset: All readOnly field names including inherited ones
    """
    spec_dict = get_openapi_schema_dict()
    schema = spec_dict['components']['schemas'].get(schema_name, {})
    readonly_fields = set()
    # Handle allOf (schema inheritance)
    if 'allOf' in schema:
        for item in schema['allOf']:
            # Resolve $ref to parent schema
            if '$ref' in item:
                ref_path = item['$ref'].split('/')[-1]
                ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
                if 'properties' in ref_schema:
                    for field_name, field_def in ref_schema['properties'].items():
                        if field_def.get('readOnly') is True:
                            readonly_fields.add(field_name)
            # Check schema-specific properties
            if 'properties' in item:
                for field_name, field_def in item['properties'].items():
                    if field_def.get('readOnly') is True:
                        readonly_fields.add(field_name)
    else:
        # Direct properties (no inheritance)
        if 'properties' in schema:
            for field_name, field_def in schema['properties'].items():
                if field_def.get('readOnly') is True:
                    readonly_fields.add(field_name)
    return frozenset(readonly_fields)
@functools.cache
 def get_watch_schema_properties():
    """
    Extract watch schema properties from OpenAPI spec for Import endpoint.
    Returns WatchBase properties (all writable Watch fields).
    """
    return _resolve_schema_properties('WatchBase')
@functools.cache
 def get_readonly_watch_fields():
    """
    Extract readOnly field names from Watch schema in OpenAPI spec.
    Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
    """
    return _resolve_readonly_fields('Watch')
@functools.cache
 def get_tag_schema_properties():
    """
    Extract Tag schema properties from OpenAPI spec.
    Returns WatchBase properties + Tag-specific properties (overrides_watch).
    """
    return _resolve_schema_properties('Tag')
@functools.cache
 def get_readonly_tag_fields():
    """
    Extract readOnly field names from Tag schema in OpenAPI spec.
    Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
    """
    return _resolve_readonly_fields('Tag')
 def validate_openapi_request(operation_id):
    """Decorator to validate incoming requests against OpenAPI spec."""
    def decorator(f):
@@ -72,8 +165,16 @@ def validate_openapi_request(operation_id):
                    if result.errors:
                        error_details = []
                        for error in result.errors:
-                            error_details.append(str(error))
+                            # Extract detailed schema errors from __cause__
-                        raise BadRequest(f"OpenAPI validation failed: {error_details}")
+                            if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
                                for schema_error in error.__cause__.schema_errors:
                                    field = '.'.join(str(p) for p in schema_error.path) if schema_error.path else 'body'
                                    msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
                                    error_details.append(f"{field}: {msg}")
                            else:
                                error_details.append(str(error))
                            logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
                        raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
            except BadRequest:
                # Re-raise BadRequest exceptions (validation failures)
                raise
--- a/changedetectionio/api/api_schema.py
+++ b/changedetectionio/api/api_schema.py
@@ -1,162 +0,0 @@
 # Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
 # Probably other ways to solve this when the backend switches to some ORM
 from changedetectionio.notification import valid_notification_formats
 def build_time_between_check_json_schema():
    # Setup time between check schema
    schema_properties_time_between_check = {
        "type": "object",
        "additionalProperties": False,
        "properties": {}
    }
    for p in ['weeks', 'days', 'hours', 'minutes', 'seconds']:
        schema_properties_time_between_check['properties'][p] = {
            "anyOf": [
                {
                    "type": "integer"
                },
                {
                    "type": "null"
                }
            ]
        }
    return schema_properties_time_between_check
 def build_watch_json_schema(d):
    # Base JSON schema
    schema = {
        'type': 'object',
        'properties': {},
    }
    for k, v in d.items():
        # @todo 'integer' is not covered here because its almost always for internal usage
        if isinstance(v, type(None)):
            schema['properties'][k] = {
                "anyOf": [
                    {"type": "null"},
                ]
            }
        elif isinstance(v, list):
            schema['properties'][k] = {
                "anyOf": [
                    {"type": "array",
                     # Always is an array of strings, like text or regex or something
                     "items": {
                         "type": "string",
                         "maxLength": 5000
                     }
                     },
                ]
            }
        elif isinstance(v, bool):
            schema['properties'][k] = {
                "anyOf": [
                    {"type": "boolean"},
                ]
            }
        elif isinstance(v, str):
            schema['properties'][k] = {
                "anyOf": [
                    {"type": "string",
                     "maxLength": 5000},
                ]
            }
    # Can also be a string (or None by default above)
    for v in ['body',
              'notification_body',
              'notification_format',
              'notification_title',
              'proxy',
              'tag',
              'title',
              'webdriver_js_execute_code'
              ]:
        schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
    for v in ['last_viewed']:
        schema['properties'][v] = {
            "type": "integer",
            "description": "Unix timestamp in seconds of the last time the watch was viewed.",
            "minimum": 0
        }
    # None or Boolean
    schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
    schema['properties']['method'] = {"type": "string",
                                      "enum": ["GET", "POST", "DELETE", "PUT"]
                                      }
    schema['properties']['fetch_backend']['anyOf'].append({"type": "string",
                                                           "enum": ["html_requests", "html_webdriver"]
                                                           })
    schema['properties']['processor'] = {"anyOf": [
        {"type": "string", "enum": ["restock_diff", "text_json_diff"]},
        {"type": "null"}
    ]}
    # All headers must be key/value type dict
    schema['properties']['headers'] = {
        "type": "object",
        "patternProperties": {
            # Should always be a string:string type value
            ".*": {"type": "string"},
        }
    }
    schema['properties']['notification_format'] = {'type': 'string',
                                                   'enum': list(valid_notification_formats.keys())
                                                   }
    # Stuff that shouldn't be available but is just state-storage
    for v in ['previous_md5', 'last_error', 'has_ldjson_price_data', 'previous_md5_before_filters', 'uuid']:
        del schema['properties'][v]
    schema['properties']['webdriver_delay']['anyOf'].append({'type': 'integer'})
    schema['properties']['time_between_check'] = build_time_between_check_json_schema()
    schema['properties']['time_between_check_use_default'] = {
        "type": "boolean",
        "default": True,
        "description": "Whether to use global settings for time between checks - defaults to true if not set"
    }
    schema['properties']['browser_steps'] = {
        "anyOf": [
            {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "operation": {
                            "type": ["string", "null"],
                            "maxLength": 5000  # Allows null and any string up to 5000 chars (including "")
                        },
                        "selector": {
                            "type": ["string", "null"],
                            "maxLength": 5000
                        },
                        "optional_value": {
                            "type": ["string", "null"],
                            "maxLength": 5000
                        }
                    },
                    "required": ["operation", "selector", "optional_value"],
                    "additionalProperties": False  # No extra keys allowed
                }
            },
            {"type": "null"},  # Allows null for `browser_steps`
            {"type": "array", "maxItems": 0}  # Allows empty array []
        ]
    }
    # headers ?
    return schema
--- a/changedetectionio/model/Tag.py
+++ b/changedetectionio/model/Tag.py
@@ -20,11 +20,9 @@ See: Watch.py model docstring for full Pydantic architecture explanation
 See: processors/restock_diff/processor.py:184-192 for current manual implementation
 """
 import os
 from changedetectionio.model import watch_base
 from changedetectionio.model.persistence import EntityPersistenceMixin
 class model(EntityPersistenceMixin, watch_base):
    """
    Tag domain model - groups watches and can override their settings.
--- a/changedetectionio/model/init.py
+++ b/changedetectionio/model/init.py
@@ -2,7 +2,7 @@ import os
 import uuid
 from changedetectionio import strtobool
-from .persistence import EntityPersistenceMixin
+from .persistence import EntityPersistenceMixin, _determine_entity_type
 __all__ = ['EntityPersistenceMixin', 'watch_base']
@@ -26,6 +26,7 @@ class watch_base(dict):
          - Configuration override chain resolution (Watch → Tag → Global)
          - Immutability options
          - Better testing
          - USE https://docs.pydantic.dev/latest/integrations/datamodel_code_generator TO BUILD THE MODEL FROM THE API-SPEC!!!
    CHAIN RESOLUTION ARCHITECTURE:
        The dream is a 3-level override hierarchy:
@@ -173,7 +174,7 @@ class watch_base(dict):
            'body': None,
            'browser_steps': [],
            'browser_steps_last_error_step': None,
-            'conditions' : {},
+            'conditions' : [],
            'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
            'check_count': 0,
            'check_unique_lines': False,  # On change-detected, compare against all history if its something new
@@ -299,6 +300,42 @@ class watch_base(dict):
        if self.get('default'):
            del self['default']
    @classmethod
    def get_property_names(cls):
        """
        Get all @property attribute names from this model class using introspection.
        This discovers computed/derived properties that are not stored in the datastore.
        These properties should be filtered out during PUT/POST requests.
        Returns:
            frozenset: Immutable set of @property attribute names from the model class
        """
        import functools
        # Create a cached version if it doesn't exist
        if not hasattr(cls, '_cached_get_property_names'):
            @functools.cache
            def _get_props():
                properties = set()
                # Use introspection to find all @property attributes
                for name in dir(cls):
                    # Skip private/magic attributes
                    if name.startswith('_'):
                        continue
                    try:
                        attr = getattr(cls, name)
                        # Check if it's a property descriptor
                        if isinstance(attr, property):
                            properties.add(name)
                    except (AttributeError, TypeError):
                        continue
                return frozenset(properties)
            cls._cached_get_property_names = _get_props
        return cls._cached_get_property_names()
    def __deepcopy__(self, memo):
        """
        Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.).
@@ -511,10 +548,8 @@ class watch_base(dict):
        # Save to disk via subclass implementation
        try:
            # Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
            from changedetectionio.model.persistence import _determine_entity_type
            entity_type = _determine_entity_type(self.__class__)
            filename = f"{entity_type}.json"
            self._save_to_disk(data_dict, uuid)
            logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
        except Exception as e:
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -56,6 +56,259 @@ def _deduplicate_prices(data):
    return list(unique_data)
 # =============================================================================
 # MEMORY MANAGEMENT: Why We Use Multiprocessing (Linux Only)
 # =============================================================================
 #
 # The get_itemprop_availability() function uses 'extruct' to parse HTML metadata
 # (JSON-LD, microdata, OpenGraph, etc). Extruct internally uses lxml, which wraps
 # libxml2 - a C library that allocates memory at the C level.
 #
 # Memory Leak Problem:
 # --------------------
 # 1. lxml's document_fromstring() creates thousands of Python objects backed by
 #    C-level allocations (nodes, attributes, text content)
 # 2. Python's garbage collector can mark these objects as collectible, but
 #    cannot force the OS to reclaim the actual C-level memory
 # 3. malloc/free typically doesn't return memory to OS - it just marks it as
 #    "free in the process address space"
 # 4. With repeated parsing of large HTML (5MB+ pages), memory accumulates even
 #    after Python GC runs
 #
 # Why Multiprocessing Fixes This:
 # --------------------------------
 # When a subprocess exits, the OS forcibly reclaims ALL memory including C-level
 # allocations that Python GC couldn't release. This ensures clean memory state
 # after each extraction.
 #
 # Performance Impact:
 # -------------------
 # - Memray analysis showed 1.2M document_fromstring allocations per page
 # - Without subprocess: memory grows by ~50-500MB per parse and lingers
 # - With subprocess: ~35MB overhead but forces full cleanup after each run
 # - Trade-off: 35MB resource_tracker vs 500MB+ accumulated leak = much better at scale
 #
 # References:
 # -----------
 # - lxml memory issues: https://medium.com/devopss-hole/python-lxml-memory-leak-b8d0b1000dc7
 # - libxml2 caching behavior: https://www.mail-archive.com/lxml@python.org/msg00026.html
 # - GC limitations with C extensions: https://benbernardblog.com/tracking-down-a-freaky-python-memory-leak-part-2/
 #
 # Additional Context:
 # -------------------
 # - jsonpath_ng (used to query the parsed data) is pure Python and doesn't leak
 # - The leak is specifically from lxml's document parsing, not the JSONPath queries
 # - Linux-only because multiprocessing spawn is well-tested there; other platforms
 #   use direct call as fallback
 #
 # Alternative Solution (Future Optimization):
 # -------------------------------------------
 # This entire problem could be avoided by using regex to extract just the machine
 # data blocks (JSON-LD, microdata, OpenGraph tags) BEFORE parsing with lxml:
 #
 #   1. Use regex to extract <script type="application/ld+json">...</script> blocks
 #   2. Use regex to extract <meta property="og:*"> tags
 #   3. Use regex to find itemprop/itemtype attributes and their containing elements
 #   4. Parse ONLY those extracted snippets instead of the entire HTML document
 #
 # Benefits:
 #   - Avoids parsing 5MB of HTML when we only need a few KB of metadata
 #   - Eliminates the lxml memory leak entirely
 #   - Faster extraction (regex is much faster than DOM parsing)
 #   - No subprocess overhead needed
 #
 # Trade-offs:
 #   - Regex for HTML is brittle (comments, CDATA, edge cases)
 #   - Microdata extraction would be complex (need to track element boundaries)
 #   - Would need extensive testing to ensure we don't miss valid data
 #   - extruct is battle-tested; regex solution would need similar maturity
 #
 # For now, the subprocess approach is safer and leverages existing extruct code.
 # =============================================================================
 def _extract_itemprop_availability_worker(pipe_conn):
    """
    Subprocess worker for itemprop extraction (Linux memory management).
    Uses spawn multiprocessing to isolate extruct/lxml memory allocations.
    When the subprocess exits, the OS reclaims ALL memory including lxml's
    C-level allocations that Python's GC cannot release.
    Args:
        pipe_conn: Pipe connection to receive HTML and send result
    """
    import json
    import gc
    html_content = None
    result_data = None
    try:
        # Receive HTML as raw bytes (no pickle)
        html_bytes = pipe_conn.recv_bytes()
        html_content = html_bytes.decode('utf-8')
        # Explicitly delete html_bytes to free memory
        del html_bytes
        gc.collect()
        # Perform extraction in subprocess (uses extruct/lxml)
        result_data = get_itemprop_availability(html_content)
        # Convert Restock object to dict for JSON serialization
        result = {
            'success': True,
            'data': dict(result_data) if result_data else {}
        }
        pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
        # Clean up before exit
        del result_data, html_content, result
        gc.collect()
    except MoreThanOnePriceFound:
        # Serialize the specific exception type
        result = {
            'success': False,
            'exception_type': 'MoreThanOnePriceFound'
        }
        pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
    except Exception as e:
        # Serialize other exceptions
        result = {
            'success': False,
            'exception_type': type(e).__name__,
            'exception_message': str(e)
        }
        pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
    finally:
        # Final cleanup before subprocess exits
        # Variables may already be deleted in try block, so use try/except
        try:
            del html_content
        except (NameError, UnboundLocalError):
            pass
        try:
            del result_data
        except (NameError, UnboundLocalError):
            pass
        gc.collect()
        pipe_conn.close()
 def extract_itemprop_availability_safe(html_content) -> Restock:
    """
    Extract itemprop availability with hybrid approach for memory efficiency.
    Strategy (fastest to slowest, least to most memory):
    1. Try pure Python extraction (JSON-LD, OpenGraph, microdata) - covers 80%+ of cases
    2. Fall back to extruct with subprocess isolation on Linux for complex cases
    Args:
        html_content: HTML string to parse
    Returns:
        Restock: Extracted availability data
    Raises:
        MoreThanOnePriceFound: When multiple prices detected
        Other exceptions: From extruct/parsing
    """
    import platform
    # Step 1: Try pure Python extraction first (fast, no lxml, no memory leak)
    try:
        from .pure_python_extractor import extract_metadata_pure_python, query_price_availability
        logger.trace("Attempting pure Python metadata extraction (no lxml)")
        extracted_data = extract_metadata_pure_python(html_content)
        price_data = query_price_availability(extracted_data)
        # If we got price AND availability, we're done!
        if price_data.get('price') and price_data.get('availability'):
            result = Restock(price_data)
            logger.debug(f"Pure Python extraction successful: {dict(result)}")
            return result
        # If we got some data but not everything, still try extruct for completeness
        if price_data.get('price') or price_data.get('availability'):
            logger.debug(f"Pure Python extraction partial: {price_data}, will try extruct for completeness")
    except Exception as e:
        logger.debug(f"Pure Python extraction failed: {e}, falling back to extruct")
    # Step 2: Fall back to extruct (uses lxml, needs subprocess on Linux)
    logger.trace("Falling back to extruct (lxml-based) with subprocess isolation")
    # Only use subprocess isolation on Linux
    # Other platforms may have issues with spawn or don't need the aggressive memory management
    if platform.system() == 'Linux':
        import multiprocessing
        import json
        import gc
        try:
            ctx = multiprocessing.get_context('spawn')
            parent_conn, child_conn = ctx.Pipe()
            p = ctx.Process(target=_extract_itemprop_availability_worker, args=(child_conn,))
            p.start()
            # Send HTML as raw bytes (no pickle)
            html_bytes = html_content.encode('utf-8')
            parent_conn.send_bytes(html_bytes)
            # Explicitly delete html_bytes copy immediately after sending
            del html_bytes
            gc.collect()
            # Receive result as JSON
            result_bytes = parent_conn.recv_bytes()
            result = json.loads(result_bytes.decode('utf-8'))
            # Wait for subprocess to complete
            p.join()
            # Close pipes
            parent_conn.close()
            child_conn.close()
            # Clean up all subprocess-related objects
            del p, parent_conn, child_conn, result_bytes
            gc.collect()
            # Handle result or re-raise exception
            if result['success']:
                # Reconstruct Restock object from dict
                restock_obj = Restock(result['data'])
                # Clean up result dict
                del result
                gc.collect()
                return restock_obj
            else:
                # Re-raise the exception that occurred in subprocess
                exception_type = result['exception_type']
                exception_msg = result.get('exception_message', '')
                del result
                gc.collect()
                if exception_type == 'MoreThanOnePriceFound':
                    raise MoreThanOnePriceFound()
                else:
                    raise Exception(f"{exception_type}: {exception_msg}")
        except Exception as e:
            # If multiprocessing itself fails, log and fall back to direct call
            logger.warning(f"Subprocess extraction failed: {e}, falling back to direct call")
            gc.collect()
            return get_itemprop_availability(html_content)
    else:
        # Non-Linux: direct call (no subprocess overhead needed)
        return get_itemprop_availability(html_content)
 # should return Restock()
 # add casting?
 def get_itemprop_availability(html_content) -> Restock:
@@ -196,8 +449,9 @@ class perform_site_check(difference_detection_processor):
        multiple_prices_found = False
        # Try built-in extraction first, this will scan metadata in the HTML
        # On Linux, this runs in a subprocess to prevent lxml/extruct memory leaks
        try:
-            itemprop_availability = get_itemprop_availability(self.fetcher.content)
+            itemprop_availability = extract_itemprop_availability_safe(self.fetcher.content)
        except MoreThanOnePriceFound as e:
            # Don't raise immediately - let plugins try to handle this case
            # Plugins might be able to determine which price is correct
--- a/changedetectionio/processors/restock_diff/pure_python_extractor.py
+++ b/changedetectionio/processors/restock_diff/pure_python_extractor.py
@@ -0,0 +1,286 @@
 """
 Pure Python metadata extractor - no lxml, no memory leaks.
 This module provides a fast, memory-efficient alternative to extruct for common
 e-commerce metadata extraction. It handles:
 - JSON-LD (covers 80%+ of modern sites)
 - OpenGraph meta tags
 - Basic microdata attributes
 Uses Python's built-in html.parser instead of lxml/libxml2, avoiding C-level
 memory allocation issues. For edge cases, the main processor can fall back to
 extruct (with subprocess isolation on Linux).
 """
 from html.parser import HTMLParser
 import json
 import re
 from loguru import logger
 class JSONLDExtractor(HTMLParser):
    """
    Extract JSON-LD structured data from HTML.
    Finds all <script type="application/ld+json"> tags and parses their content.
    Handles multiple JSON-LD blocks on the same page.
    """
    def __init__(self):
        super().__init__()
        self.in_jsonld = False
        self.data = []  # List of all parsed JSON-LD objects
        self.current_script = []
    def handle_starttag(self, tag, attrs):
        if tag == 'script':
            # Check if this is a JSON-LD script tag
            for attr, value in attrs:
                if attr == 'type' and value == 'application/ld+json':
                    self.in_jsonld = True
                    self.current_script = []
                    break
    def handle_data(self, data):
        if self.in_jsonld:
            self.current_script.append(data)
    def handle_endtag(self, tag):
        if tag == 'script' and self.in_jsonld:
            # Parse the accumulated script content
            script_content = ''.join(self.current_script)
            if script_content.strip():
                try:
                    # Parse JSON (handles both objects and arrays)
                    parsed = json.loads(script_content)
                    if isinstance(parsed, list):
                        self.data.extend(parsed)
                    else:
                        self.data.append(parsed)
                except json.JSONDecodeError as e:
                    logger.debug(f"Failed to parse JSON-LD: {e}")
                    pass
            self.in_jsonld = False
            self.current_script = []
 class OpenGraphExtractor(HTMLParser):
    """
    Extract OpenGraph meta tags from HTML.
    Finds <meta property="og:*"> tags commonly used for social media sharing.
    """
    def __init__(self):
        super().__init__()
        self.og_data = {}
    def handle_starttag(self, tag, attrs):
        if tag == 'meta':
            attrs_dict = dict(attrs)
            prop = attrs_dict.get('property', '')
            # Extract OpenGraph properties
            if prop.startswith('og:'):
                content = attrs_dict.get('content', '')
                if content:
                    self.og_data[prop] = content
 class MicrodataExtractor(HTMLParser):
    """
    Extract basic microdata attributes from HTML.
    Finds elements with itemprop attributes. This is a simplified extractor
    that doesn't handle nested itemscope/itemtype hierarchies - for complex
    cases, use extruct as fallback.
    """
    def __init__(self):
        super().__init__()
        self.microdata = {}
        self.current_itemprop = None
    def handle_starttag(self, tag, attrs):
        attrs_dict = dict(attrs)
        if 'itemprop' in attrs_dict:
            itemprop = attrs_dict['itemprop']
            # Price/currency/availability can be in content/href attributes
            if itemprop == 'price':
                if 'content' in attrs_dict:
                    self.microdata['price'] = attrs_dict['content']
                else:
                    self.current_itemprop = 'price'
            elif itemprop == 'priceCurrency':
                if 'content' in attrs_dict:
                    self.microdata['currency'] = attrs_dict['content']
                else:
                    self.current_itemprop = 'priceCurrency'
            elif itemprop == 'availability':
                # Can be in href (link) or content (meta)
                if 'href' in attrs_dict:
                    self.microdata['availability'] = attrs_dict['href']
                elif 'content' in attrs_dict:
                    self.microdata['availability'] = attrs_dict['content']
                else:
                    self.current_itemprop = 'availability'
    def handle_data(self, data):
        # Capture text content for itemprop elements
        if self.current_itemprop == 'price':
            # Try to extract numeric price from text
            try:
                price_text = re.sub(r'[^\d.]', '', data.strip())
                if price_text:
                    self.microdata['price'] = float(price_text)
            except ValueError:
                pass
        elif self.current_itemprop == 'priceCurrency':
            currency = data.strip()
            if currency:
                self.microdata['currency'] = currency
        elif self.current_itemprop == 'availability':
            availability = data.strip()
            if availability:
                self.microdata['availability'] = availability
    def handle_endtag(self, tag):
        # Reset current itemprop after closing tag
        self.current_itemprop = None
 def extract_metadata_pure_python(html_content):
    """
    Extract structured metadata from HTML using pure Python parsers.
    Returns a dict with three keys:
    - 'json-ld': List of parsed JSON-LD objects
    - 'opengraph': Dict of OpenGraph properties
    - 'microdata': Dict of microdata properties
    Args:
        html_content: HTML string to parse
    Returns:
        dict: Extracted metadata in three formats
    """
    result = {
        'json-ld': [],
        'opengraph': {},
        'microdata': {}
    }
    # Extract JSON-LD
    try:
        jsonld_extractor = JSONLDExtractor()
        jsonld_extractor.feed(html_content)
        result['json-ld'] = jsonld_extractor.data
        logger.trace(f"Pure Python: Found {len(jsonld_extractor.data)} JSON-LD blocks")
    except Exception as e:
        logger.debug(f"JSON-LD extraction failed: {e}")
    # Extract OpenGraph
    try:
        og_extractor = OpenGraphExtractor()
        og_extractor.feed(html_content)
        result['opengraph'] = og_extractor.og_data
        if result['opengraph']:
            logger.trace(f"Pure Python: Found {len(og_extractor.og_data)} OpenGraph tags")
    except Exception as e:
        logger.debug(f"OpenGraph extraction failed: {e}")
    # Extract Microdata
    try:
        microdata_extractor = MicrodataExtractor()
        microdata_extractor.feed(html_content)
        result['microdata'] = microdata_extractor.microdata
        if result['microdata']:
            logger.trace(f"Pure Python: Found microdata: {result['microdata']}")
    except Exception as e:
        logger.debug(f"Microdata extraction failed: {e}")
    return result
 def query_price_availability(extracted_data):
    """
    Query extracted metadata for price and availability information.
    Uses jsonpath_ng to query JSON-LD data (same approach as extruct).
    Falls back to OpenGraph and microdata if JSON-LD doesn't have the data.
    Args:
        extracted_data: Dict from extract_metadata_pure_python()
    Returns:
        dict: {'price': float, 'currency': str, 'availability': str}
    """
    from jsonpath_ng import parse
    result = {}
    # 1. Try JSON-LD first (most reliable and common)
    for data in extracted_data.get('json-ld', []):
        try:
            # Use jsonpath to find price/availability anywhere in the structure
            price_parse = parse('$..(price|Price)')
            availability_parse = parse('$..(availability|Availability)')
            currency_parse = parse('$..(priceCurrency|currency|priceCurrency)')
            price_results = [m.value for m in price_parse.find(data)]
            if price_results and not result.get('price'):
                # Handle various price formats
                price_val = price_results[0]
                if isinstance(price_val, (int, float)):
                    result['price'] = float(price_val)
                elif isinstance(price_val, str):
                    # Extract numeric value from string
                    try:
                        result['price'] = float(re.sub(r'[^\d.]', '', price_val))
                    except ValueError:
                        pass
            avail_results = [m.value for m in availability_parse.find(data)]
            if avail_results and not result.get('availability'):
                result['availability'] = str(avail_results[0])
            curr_results = [m.value for m in currency_parse.find(data)]
            if curr_results and not result.get('currency'):
                result['currency'] = str(curr_results[0])
            # If we found price, this JSON-LD block is good
            if result.get('price'):
                logger.debug(f"Pure Python: Found price data in JSON-LD: {result}")
                break
        except Exception as e:
            logger.debug(f"Error querying JSON-LD: {e}")
            continue
    # 2. Try OpenGraph if JSON-LD didn't provide everything
    og_data = extracted_data.get('opengraph', {})
    if not result.get('price') and 'og:price:amount' in og_data:
        try:
            result['price'] = float(og_data['og:price:amount'])
        except ValueError:
            pass
    if not result.get('currency') and 'og:price:currency' in og_data:
        result['currency'] = og_data['og:price:currency']
    if not result.get('availability') and 'og:availability' in og_data:
        result['availability'] = og_data['og:availability']
    # 3. Use microdata as last resort
    microdata = extracted_data.get('microdata', {})
    if not result.get('price') and 'price' in microdata:
        result['price'] = microdata['price']
    if not result.get('currency') and 'currency' in microdata:
        result['currency'] = microdata['currency']
    if not result.get('availability') and 'availability' in microdata:
        result['availability'] = microdata['availability']
    return result
--- a/changedetectionio/store/init.py
+++ b/changedetectionio/store/init.py
@@ -33,9 +33,8 @@ except ImportError:
 from ..processors import get_custom_watch_obj_for_processor
 # Import the base class and helpers
-from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_watch_atomic, save_tag_atomic, save_json_atomic
+from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_json_atomic
 from .updates import DatastoreUpdatesMixin
 from .legacy_loader import has_legacy_datastore
 # Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
 BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
@@ -78,7 +77,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
            copyfile(db_path, db_path_version_backup)
-    def _load_settings(self):
+    def _load_settings(self, filename="changedetection.json"):
        """
        Load settings from storage.
@@ -87,7 +86,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        Returns:
            dict: Settings data loaded from storage
        """
-        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
+        changedetection_json = os.path.join(self.datastore_path, filename)
        logger.info(f"Loading settings from {changedetection_json}")
@@ -122,6 +121,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            if 'application' in settings_data['settings']:
                self.__data['settings']['application'].update(settings_data['settings']['application'])
        # More or less for the old format which had this data in the one url-watches.json
        # cant hurt to leave it here,
        if 'watching' in settings_data:
            self.__data['watching'].update(settings_data['watching'])
    def _rehydrate_tags(self):
        """Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
        from ..model import Tag
@@ -146,23 +150,28 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        logger.info(f"Rehydrating {watch_count} watches...")
        watching_rehydrated = {}
        for uuid, watch_dict in self.__data.get('watching', {}).items():
-            watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
+            if isinstance(watch_dict, dict):
                watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
            else:
                logger.error(f"Watch UUID {uuid} already rehydrated")
        self.__data['watching'] = watching_rehydrated
        logger.success(f"Rehydrated {watch_count} watches into Watch objects")
-    def _load_state(self):
+    def _load_state(self, main_settings_filename="changedetection.json"):
        """
        Load complete datastore state from storage.
        Orchestrates loading of settings, watches, and tags using polymorphic methods.
        """
        # Load settings
-        settings_data = self._load_settings()
+        settings_data = self._load_settings(filename=main_settings_filename)
        self._apply_settings(settings_data)
-        # Load watches (polymorphic - parent class method)
+        # Load watches, scan them from the disk
        self._load_watches()
        self._rehydrate_watches()
        # Load tags from individual tag.json files
        # These will override any tags in settings (migration path)
@@ -200,112 +209,73 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        # Check if datastore already exists
        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
        changedetection_json_old_schema = os.path.join(self.datastore_path, "url-watches.json")
        if os.path.exists(changedetection_json):
            # Load existing datastore (changedetection.json + watch.json files)
            logger.info("Loading existing datastore")
            try:
                self._load_state()
            except Exception as e:
                logger.critical(f"Failed to load datastore: {e}")
                raise
            # Run schema updates if needed
            # Pass current schema version from loaded datastore (defaults to 0 if not set)
            # Load existing datastore (changedetection.json + watch.json files)
            logger.info("Loading existing datastore")
            self._load_state()
            current_schema = self.data['settings']['application'].get('schema_version', 0)
            self.run_updates(current_schema_version=current_schema)
        # Legacy datastore detected - trigger migration, even works if the schema is much before the migration step.
        elif os.path.exists(changedetection_json_old_schema):
            logger.critical(f"Legacy datastore detected at {changedetection_json_old_schema}, loading and running updates")
            self._load_state(main_settings_filename="url-watches.json")
            # update 26 will load the whole old config from disk to __data
            current_schema = self.__data['settings']['application'].get('schema_version', 0)
            self.run_updates(current_schema_version=current_schema)
            # Probably tags were also shifted to disk and many other changes, so best to reload here.
            self._load_state()
        else:
            # No datastore yet - check if this is a fresh install or legacy migration
-            # Generate app_guid FIRST (required for all operations)
+            self.init_fresh_install(include_default_watches=include_default_watches,
-            if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
+                                    version_tag=version_tag)
                self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
            else:
                self.__data['app_guid'] = str(uuid_builder.uuid4())
-            # Generate RSS access token
+    def init_fresh_install(self, include_default_watches, version_tag):
-            self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
+      # Generate app_guid FIRST (required for all operations)
        if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
            self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
        else:
            self.__data['app_guid'] = str(uuid_builder.uuid4())
-            # Generate API access token
+        # Generate RSS access token
-            self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
+        self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
-            # Check if legacy datastore exists (url-watches.json)
+        # Generate API access token
-            if has_legacy_datastore(self.datastore_path):
+        self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
-                # Legacy datastore detected - trigger migration
+        logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
                logger.critical(f"Legacy datastore detected at {self.datastore_path}/url-watches.json")
                logger.critical("Migration will be triggered via update_26")
-                # Load the legacy datastore
+        # Set schema version to latest (no updates needed)
-                from .legacy_loader import load_legacy_format
+        latest_update_available = self.get_updates_available().pop()
-                legacy_path = os.path.join(self.datastore_path, "url-watches.json")
+        logger.info(f"Marking fresh install to schema version {latest_update_available}")
-                legacy_data = load_legacy_format(legacy_path)
+        self.__data['settings']['application']['schema_version'] = latest_update_available
-                if not legacy_data:
+        # Add default watches if requested
-                    raise Exception("Failed to load legacy datastore from url-watches.json")
+        if include_default_watches:
            self.add_watch(
                url='https://news.ycombinator.com/',
                tag='Tech news',
                extras={'fetch_backend': 'html_requests'}
            )
            self.add_watch(
                url='https://changedetection.io/CHANGELOG.txt',
                tag='changedetection.io',
                extras={'fetch_backend': 'html_requests'}
            )
-                # Merge legacy data with base_config defaults (preserves new fields like 'ui')
+        # Create changedetection.json immediately
-                # self.__data already has App.model() defaults from line 190
+        try:
-                logger.info("Merging legacy data with base_config defaults...")
+            self._save_settings()
-
+            logger.info("Created changedetection.json for new datastore")
-                # Apply top-level fields from legacy data
+        except Exception as e:
-                if 'app_guid' in legacy_data:
+            logger.error(f"Failed to create initial changedetection.json: {e}")
                    self.__data['app_guid'] = legacy_data['app_guid']
                if 'build_sha' in legacy_data:
                    self.__data['build_sha'] = legacy_data['build_sha']
                if 'version_tag' in legacy_data:
                    self.__data['version_tag'] = legacy_data['version_tag']
                # Apply watching data (complete replacement as these are user's watches)
                if 'watching' in legacy_data:
                    self.__data['watching'] = legacy_data['watching']
                # Merge settings sections (preserves base_config defaults for missing fields)
                if 'settings' in legacy_data:
                    if 'headers' in legacy_data['settings']:
                        self.__data['settings']['headers'].update(legacy_data['settings']['headers'])
                    if 'requests' in legacy_data['settings']:
                        self.__data['settings']['requests'].update(legacy_data['settings']['requests'])
                    if 'application' in legacy_data['settings']:
                        # CRITICAL: Use .update() to merge, not replace
                        # This preserves new fields like 'ui' that exist in base_config
                        self.__data['settings']['application'].update(legacy_data['settings']['application'])
                # CRITICAL: Rehydrate watches from dicts into Watch objects
                # This ensures watches have their methods available during migration
                self._rehydrate_watches()
                # update_26 will save watches to individual files and create changedetection.json
                # Next startup will load from new format normally
                self.run_updates()
            else:
                # Fresh install - create new datastore
                logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
                # Set schema version to latest (no updates needed)
                updates_available = self.get_updates_available()
                self.__data['settings']['application']['schema_version'] = updates_available.pop() if updates_available else 26
                # Add default watches if requested
                if include_default_watches:
                    self.add_watch(
                        url='https://news.ycombinator.com/',
                        tag='Tech news',
                        extras={'fetch_backend': 'html_requests'}
                    )
                    self.add_watch(
                        url='https://changedetection.io/CHANGELOG.txt',
                        tag='changedetection.io',
                        extras={'fetch_backend': 'html_requests'}
                    )
                # Create changedetection.json immediately
                try:
                    self._save_settings()
                    logger.info("Created changedetection.json for new datastore")
                except Exception as e:
                    logger.error(f"Failed to create initial changedetection.json: {e}")
        # Set version tag
        self.__data['version_tag'] = version_tag
@@ -383,17 +353,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        # Deep copy settings to avoid modifying the original
        settings_copy = copy.deepcopy(self.__data['settings'])
        # Only exclude tags if we've already migrated them to individual files (schema >= 28)
        # This ensures update_28 can migrate tags from settings
        schema_version = self.__data['settings']['application'].get('schema_version', 0)
        if schema_version >= 28:
            # Tags are in individual tag.json files, don't save to settings
            settings_copy['application']['tags'] = {}
        # else: keep tags in settings for update_28 migration
        return {
            'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
-            'app_guid': self.__data['app_guid'],
+            'app_guid': self.__data.get('app_guid'),
            'settings': settings_copy,
            'build_sha': self.__data.get('build_sha'),
            'version_tag': self.__data.get('version_tag')
@@ -422,15 +384,14 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        Implementation of abstract method from FileSavingDataStore.
        Delegates to helper function and stores results in internal data structure.
        """
        watching = load_all_watches(
            self.datastore_path,
            self.rehydrate_entity
        )
        # Store loaded data
-        self.__data['watching'] = watching
+        # @note this will also work for the old legacy format because self.__data['watching'] should already have them loaded by this point.
-
+        self.__data['watching'].update(load_all_watches(
-        logger.debug(f"Loaded {len(watching)} watches")
+            self.datastore_path,
            self.rehydrate_entity
        ))
        logger.debug(f"Loaded {len(self.__data['watching'])} watches")
    def _load_tags(self):
        """
--- a/changedetectionio/store/file_saving_datastore.py
+++ b/changedetectionio/store/file_saving_datastore.py
@@ -207,15 +207,6 @@ def save_watch_atomic(watch_dir, uuid, watch_dict):
    save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
 def save_tag_atomic(tag_dir, uuid, tag_dict):
    """
    Save a tag to disk using atomic write pattern.
    Convenience wrapper around save_entity_atomic for tags.
    Kept for backwards compatibility.
    """
    save_entity_atomic(tag_dir, uuid, tag_dict, "tag.json", "tag", max_size_mb=1)
 def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
    """
--- a/changedetectionio/store/legacy_loader.py
+++ b/changedetectionio/store/legacy_loader.py
@@ -1,66 +0,0 @@
 """
 Legacy format loader for url-watches.json.
 Provides functions to detect and load from the legacy monolithic JSON format.
 Used during migration (update_26) to transition to individual watch.json files.
 """
 import os
 import json
 from loguru import logger
 # Try to import orjson for faster JSON serialization
 try:
    import orjson
    HAS_ORJSON = True
 except ImportError:
    HAS_ORJSON = False
 def has_legacy_datastore(datastore_path):
    """
    Check if a legacy url-watches.json file exists.
    This is used by update_26 to determine if migration is needed.
    Args:
        datastore_path: Path to datastore directory
    Returns:
        bool: True if url-watches.json exists
    """
    url_watches_json = os.path.join(datastore_path, "url-watches.json")
    return os.path.exists(url_watches_json)
 def load_legacy_format(json_store_path):
    """
    Load datastore from legacy url-watches.json format.
    Args:
        json_store_path: Full path to url-watches.json file
    Returns:
        dict: Loaded datastore data with 'watching', 'settings', etc.
        None: If file doesn't exist or loading failed
    """
    logger.info(f"Loading from legacy format: {json_store_path}")
    if not os.path.isfile(json_store_path):
        logger.warning(f"Legacy file not found: {json_store_path}")
        return None
    try:
        if HAS_ORJSON:
            with open(json_store_path, 'rb') as f:
                data = orjson.loads(f.read())
        else:
            with open(json_store_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
        logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
        return data
    except Exception as e:
        logger.error(f"Failed to load legacy format: {e}")
        return None
--- a/changedetectionio/store/updates.py
+++ b/changedetectionio/store/updates.py
@@ -16,12 +16,18 @@ import time
 from loguru import logger
 from copy import deepcopy
 # Try to import orjson for faster JSON serialization
 try:
    import orjson
    HAS_ORJSON = True
 except ImportError:
    HAS_ORJSON = False
 from ..html_tools import TRANSLATE_WHITESPACE_TABLE
 from ..processors.restock_diff import Restock
 from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
 from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
 from .file_saving_datastore import save_watch_atomic
 def create_backup_tarball(datastore_path, update_number):
    """
@@ -97,7 +103,7 @@ def create_backup_tarball(datastore_path, update_number):
                    tar.add(tag_json, arcname=f"{entry}/tag.json")
                    tag_count += 1
-            logger.success(f"Backup created: {backup_filename} ({watch_count} watches, {tag_count} tags)")
+            logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
            return backup_path
    except Exception as e:
@@ -137,6 +143,7 @@ class DatastoreUpdatesMixin:
        return updates_available
    def run_updates(self, current_schema_version=None):
        import sys
        """
        Run all pending schema updates sequentially.
@@ -160,6 +167,23 @@ class DatastoreUpdatesMixin:
        4. All changes saved via individual .commit() calls
        """
        updates_available = self.get_updates_available()
        if self.data.get('watching'):
            test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
            from ..model.Watch import model
            if not isinstance(test_watch, model):
                import sys
                logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
                sys.exit(1)
        if self.data['settings']['application'].get('tags',{}):
            test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
            from ..model.Tag import model as tag_model
            if not isinstance(test_tag, tag_model):
                import sys
                logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
                sys.exit(1)
        # Determine current schema version
        if current_schema_version is None:
@@ -201,10 +225,9 @@ class DatastoreUpdatesMixin:
                try:
                    update_method = getattr(self, f"update_{update_n}")()
                except Exception as e:
-                    logger.error(f"Error while trying update_{update_n}")
+                    logger.critical(f"Error while trying update_{update_n}")
-                    logger.error(e)
+                    logger.exception(e)
-                    # Don't run any more updates
+                    sys.exit(1)
                    return
                else:
                    # Bump the version
                    self.data['settings']['application']['schema_version'] = update_n
@@ -555,27 +578,6 @@ class DatastoreUpdatesMixin:
        logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
        logger.critical("=" * 80)
        # Check if already migrated
        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
        if os.path.exists(changedetection_json):
            logger.info("Migration already completed (changedetection.json exists), skipping")
            return
        # Check if we need to load legacy data
        from .legacy_loader import has_legacy_datastore, load_legacy_format
        if not has_legacy_datastore(self.datastore_path):
            logger.info("No legacy datastore found, nothing to migrate")
            return
        # Load legacy data from url-watches.json
        logger.critical("Loading legacy datastore from url-watches.json...")
        legacy_path = os.path.join(self.datastore_path, "url-watches.json")
        legacy_data = load_legacy_format(legacy_path)
        if not legacy_data:
            raise Exception("Failed to load legacy datastore from url-watches.json")
        # Populate settings from legacy data
        logger.info("Populating settings from legacy data...")
        watch_count = len(self.data['watching'])
@@ -587,9 +589,7 @@ class DatastoreUpdatesMixin:
        saved_count = 0
        for uuid, watch in self.data['watching'].items():
            try:
-                watch_dict = dict(watch)
+                watch.commit()
                watch_dir = os.path.join(self.datastore_path, uuid)
                save_watch_atomic(watch_dir, uuid, watch_dict)
                saved_count += 1
                if saved_count % 100 == 0:
@@ -635,18 +635,19 @@ class DatastoreUpdatesMixin:
        # Phase 4: Verify settings file exists
        logger.critical("Phase 4/4: Verifying changedetection.json exists...")
        changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
        if not os.path.isfile(changedetection_json_new_schema):
            import sys
            logger.critical("Migration failed, changedetection.json not found after update ran!")
            sys.exit(1)
        if not os.path.isfile(changedetection_json):
            raise Exception(
                "Migration failed: changedetection.json not found after save. "
                "url-watches.json remains intact, safe to retry."
            )
        logger.critical("Phase 4 complete: Verified changedetection.json exists")
        # Success! Now reload from new format
        logger.critical("Reloading datastore from new format...")
-        self._load_state() # Includes load_watches
+        # write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
        self._save_settings()
        logger.success("Datastore reloaded from new format successfully")
        logger.critical("=" * 80)
        logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
@@ -681,9 +682,11 @@ class DatastoreUpdatesMixin:
        - Enables independent tag versioning/backup
        - Maintains backwards compatibility (tags stay in settings too)
        """
        # Force save as tag.json (not watch.json) even if object is corrupted
        logger.critical("=" * 80)
        logger.critical("Running migration: Individual tag persistence (update_28)")
-        logger.critical("Creating individual tag.json files (tags remain in settings too)")
+        logger.critical("Creating individual tag.json files")
        logger.critical("=" * 80)
        tags = self.data['settings']['application'].get('tags', {})
@@ -700,27 +703,8 @@ class DatastoreUpdatesMixin:
        for uuid, tag_data in tags.items():
            try:
-                # Force save as tag.json (not watch.json) even if object is corrupted
+                tag_data.commit()
                from changedetectionio.store.file_saving_datastore import save_entity_atomic
                import os
                tag_dir = os.path.join(self.datastore_path, uuid)
                os.makedirs(tag_dir, exist_ok=True)
                # Convert to dict if it's an object
                tag_dict = dict(tag_data) if hasattr(tag_data, '__iter__') else tag_data
                # Save explicitly as tag.json
                save_entity_atomic(
                    tag_dir,
                    uuid,
                    tag_dict,
                    filename='tag.json',
                    entity_type='tag',
                    max_size_mb=1
                )
                saved_count += 1
                if saved_count % 10 == 0:
                    logger.info(f"  Progress: {saved_count}/{tag_count} tags migrated...")
@@ -737,5 +721,5 @@ class DatastoreUpdatesMixin:
        # On next load, _load_tags() will read from tag.json files and merge with settings
        logger.info("Tags saved to both settings AND individual tag.json files")
        logger.info("Future tag edits will update both locations (dual storage)")
        logger.critical("=" * 80)
        logger.critical("=" * 80)
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -328,6 +328,68 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
    )
    assert len(res.json) == 0, "Watch list should be empty"
 def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
    """
    Test the full round trip, this way we test the default Model fits back into OpenAPI spec
    :param client:
    :param live_server:
    :param measure_memory_usage:
    :param datastore_path:
    :return:
    """
    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
    set_original_response(datastore_path=datastore_path)
    test_url = url_for('test_endpoint', _external=True)
    # Create new
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({"url": test_url}),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
        follow_redirects=True
    )
    assert res.status_code == 201
    uuid = res.json.get('uuid')
    # Now fetch it and send it back
    res = client.get(
        url_for("watch", uuid=uuid),
        headers={'x-api-key': api_key}
    )
    watch=res.json
    # Be sure that 'readOnly' values are never updated in the real watch
    watch['last_changed'] = 454444444444
    watch['date_created'] = 454444444444
    # HTTP PUT ( UPDATE an existing watch )
    res = client.put(
        url_for("watch", uuid=uuid),
        headers={'x-api-key': api_key, 'content-type': 'application/json'},
        data=json.dumps(watch),
    )
    if res.status_code != 200:
        print(f"\n=== PUT failed with {res.status_code} ===")
        print(f"Error: {res.data}")
    assert res.status_code == 200, "HTTP PUT update was sent OK"
    res = client.get(
        url_for("watch", uuid=uuid),
        headers={'x-api-key': api_key}
    )
    last_changed = res.json.get('last_changed')
    assert last_changed != 454444444444
    assert last_changed != "454444444444"
    date_created = res.json.get('date_created')
    assert date_created != 454444444444
    assert date_created != "454444444444"
 def test_access_denied(client, live_server, measure_memory_usage, datastore_path):
    # `config_api_token_enabled` Should be On by default
    res = client.get(
@@ -401,6 +463,9 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
        follow_redirects=True
    )
    if res.status_code != 201:
        print(f"\n=== POST createwatch failed with {res.status_code} ===")
        print(f"Response: {res.data}")
    assert res.status_code == 201
    wait_for_all_checks(client)
@@ -464,11 +529,12 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
    )
    assert res.status_code == 400, "Should get error 400 when we give a field that doesnt exist"
-    # Message will come from `flask_expects_json`
+    # Backend validation now rejects unknown fields with a clear error message
-    # With patternProperties for processor_config_*, the error message format changed slightly
+    assert (b'Unknown field' in res.data or
-    assert (b'Additional properties are not allowed' in res.data or
+            b'Additional properties are not allowed' in res.data or
            b'Unevaluated properties are not allowed' in res.data or
            b'does not match any of the regexes' in res.data), \
-            "Should reject unknown fields with schema validation error"
+            "Should reject unknown fields with validation error"
    # Try a XSS URL
@@ -553,6 +619,8 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
    assert res.status_code == 200
    uuid = res.json[0]
    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
    assert isinstance(watch['notification_urls'], list), "notification_urls must be stored as a list"
    assert len(watch['notification_urls']) == 2, "notification_urls should have 2 entries"
    assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
    assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
@@ -599,6 +667,34 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
    assert res.status_code == 400, "Should reject unknown field"
    assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
    # Test 7: Import with complex nested array (browser_steps) - array of objects
    browser_steps = json.dumps([
        {"operation": "wait", "selector": "5", "optional_value": ""},
        {"operation": "click", "selector": "button.submit", "optional_value": ""}
    ])
    params = urllib.parse.urlencode({
        'tag': 'browser-test',
        'browser_steps': browser_steps
    })
    res = client.post(
        url_for("import") + "?" + params,
        data='https://website8.com',
        headers={'x-api-key': api_key},
        follow_redirects=True
    )
    assert res.status_code == 200, "Should accept browser_steps array"
    uuid = res.json[0]
    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
    assert len(watch['browser_steps']) == 2, "Should have 2 browser steps"
    assert watch['browser_steps'][0]['operation'] == 'wait', "First step should be wait"
    assert watch['browser_steps'][1]['operation'] == 'click', "Second step should be click"
    assert watch['browser_steps'][1]['selector'] == 'button.submit', "Second step selector should be button.submit"
    # Cleanup
    delete_all_watches(client)
 def test_api_import_small_synchronous(client, live_server, measure_memory_usage, datastore_path):
    """Test that small imports (< threshold) are processed synchronously"""
@@ -837,7 +933,9 @@ def test_api_url_validation(client, live_server, measure_memory_usage, datastore
    )
    assert res.status_code == 400, "Updating watch URL to null should fail"
    # Accept either OpenAPI validation error or our custom validation error
-    assert b'URL cannot be null' in res.data or b'OpenAPI validation failed' in res.data or b'validation error' in res.data.lower()
+    assert (b'URL cannot be null' in res.data or
            b'Validation failed' in res.data or
            b'validation error' in res.data.lower())
    # Test 8: UPDATE to empty string URL should fail
    res = client.put(
@@ -924,3 +1022,140 @@ def test_api_url_validation(client, live_server, measure_memory_usage, datastore
        headers={'x-api-key': api_key},
    )
    delete_all_watches(client)
 def test_api_time_between_check_validation(client, live_server, measure_memory_usage, datastore_path):
    """
    Test that time_between_check validation works correctly:
    - When time_between_check_use_default is false, at least one time value must be > 0
    - Values must be valid integers
    """
    import json
    from flask import url_for
    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
    # Test 1: time_between_check_use_default=false with NO time_between_check should fail
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example.com",
            "time_between_check_use_default": False
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 400, "Should fail when time_between_check_use_default=false with no time_between_check"
    assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
    # Test 2: time_between_check_use_default=false with ALL zeros should fail
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example.com",
            "time_between_check_use_default": False,
            "time_between_check": {
                "weeks": 0,
                "days": 0,
                "hours": 0,
                "minutes": 0,
                "seconds": 0
            }
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 400, "Should fail when all time values are 0"
    assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
    # Test 3: time_between_check_use_default=false with NULL values should fail
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example.com",
            "time_between_check_use_default": False,
            "time_between_check": {
                "weeks": None,
                "days": None,
                "hours": None,
                "minutes": None,
                "seconds": None
            }
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 400, "Should fail when all time values are null"
    assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
    # Test 4: time_between_check_use_default=false with valid hours should succeed
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example.com",
            "time_between_check_use_default": False,
            "time_between_check": {
                "hours": 2
            }
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 201, "Should succeed with valid hours value"
    uuid1 = res.json.get('uuid')
    # Test 5: time_between_check_use_default=false with valid minutes should succeed
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example2.com",
            "time_between_check_use_default": False,
            "time_between_check": {
                "minutes": 30
            }
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 201, "Should succeed with valid minutes value"
    uuid2 = res.json.get('uuid')
    # Test 6: time_between_check_use_default=true (or missing) with no time_between_check should succeed (uses defaults)
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example3.com",
            "time_between_check_use_default": True
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 201, "Should succeed when using default settings"
    uuid3 = res.json.get('uuid')
    # Test 7: Default behavior (no time_between_check_use_default field) should use defaults and succeed
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example4.com"
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 201, "Should succeed with default behavior (using global settings)"
    uuid4 = res.json.get('uuid')
    # Test 8: Verify integer type validation - string should fail (OpenAPI validation)
    res = client.post(
        url_for("createwatch"),
        data=json.dumps({
            "url": "https://example5.com",
            "time_between_check_use_default": False,
            "time_between_check": {
                "hours": "not_a_number"
            }
        }),
        headers={'content-type': 'application/json', 'x-api-key': api_key},
    )
    assert res.status_code == 400, "Should fail when time value is not an integer"
    assert b"Validation failed" in res.data or b"not of type" in res.data, "Should mention validation/type error"
    # Cleanup
    for uuid in [uuid1, uuid2, uuid3, uuid4]:
        client.delete(
            url_for("watch", uuid=uuid),
            headers={'x-api-key': api_key},
        )
--- a/changedetectionio/tests/test_api_notification_urls_validation.py
+++ b/changedetectionio/tests/test_api_notification_urls_validation.py
@@ -107,7 +107,7 @@ def test_watch_notification_urls_validation(client, live_server, measure_memory_
        headers={'content-type': 'application/json', 'x-api-key': api_key}
    )
    assert res.status_code == 400, "Should reject non-list notification_urls"
-    assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
+    assert b"Validation failed" in res.data or b"is not of type" in res.data
    # Test 6: Verify original URLs are preserved after failed update
    res = client.get(
@@ -159,7 +159,7 @@ def test_tag_notification_urls_validation(client, live_server, measure_memory_us
        headers={'content-type': 'application/json', 'x-api-key': api_key}
    )
    assert res.status_code == 400, "Should reject non-list notification_urls"
-    assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
+    assert b"Validation failed" in res.data or b"is not of type" in res.data
    # Test 4: Verify original URLs are preserved after failed update
    tag = datastore.data['settings']['application']['tags'][tag_uuid]
--- a/changedetectionio/tests/test_api_openapi.py
+++ b/changedetectionio/tests/test_api_openapi.py
@@ -26,7 +26,7 @@ def test_openapi_validation_invalid_content_type_on_create_watch(client, live_se
    # Should get 400 error due to OpenAPI validation failure
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"
 def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage, datastore_path):
@@ -43,7 +43,7 @@ def test_openapi_validation_missing_required_field_create_watch(client, live_ser
    # Should get 400 error due to missing required field
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"
 def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage, datastore_path):
@@ -80,10 +80,9 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
    # Should get 400 error due to invalid field (this will be caught by internal validation)
    # Note: This tests the flow where OpenAPI validation passes but internal validation catches it
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    # With patternProperties for processor_config_*, the error message format changed slightly
+    # Backend validation now returns "Unknown field(s):" message
-    assert (b"Additional properties are not allowed" in res.data or
+    assert b"Unknown field" in res.data, \
-            b"does not match any of the regexes" in res.data), \
+            "Should contain validation error about unknown fields"
            "Should contain validation error about additional/invalid properties"
 def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
@@ -100,7 +99,7 @@ def test_openapi_validation_import_wrong_content_type(client, live_server, measu
    # Should get 400 error due to content-type mismatch
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"
 def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage, datastore_path):
@@ -158,7 +157,7 @@ def test_openapi_validation_create_tag_missing_required_title(client, live_serve
    # Should get 400 error due to missing required field
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"
 def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage, datastore_path):
--- a/changedetectionio/tests/test_api_tags.py
+++ b/changedetectionio/tests/test_api_tags.py
@@ -176,4 +176,57 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
    assert res.status_code == 204
 def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
    """
    Test the full round trip, this way we test the default Model fits back into OpenAPI spec
    :param client:
    :param live_server:
    :param measure_memory_usage:
    :param datastore_path:
    :return:
    """
    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
    set_original_response(datastore_path=datastore_path)
    res = client.post(
        url_for("tag"),
        data=json.dumps({"title": "My tag title"}),
        headers={'content-type': 'application/json', 'x-api-key': api_key}
    )
    assert res.status_code == 201
    uuid = res.json.get('uuid')
    # Now fetch it and send it back
    res = client.get(
        url_for("tag", uuid=uuid),
        headers={'x-api-key': api_key}
    )
    tag = res.json
    # Only test with date_created (readOnly field that should be filtered out)
    # last_changed is Watch-specific and doesn't apply to Tags
    tag['date_created'] = 454444444444
    # HTTP PUT ( UPDATE an existing watch )
    res = client.put(
        url_for("tag", uuid=uuid),
        headers={'x-api-key': api_key, 'content-type': 'application/json'},
        data=json.dumps(tag),
    )
    if res.status_code != 200:
        print(f"\n=== PUT failed with {res.status_code} ===")
        print(f"Error: {res.data}")
    assert res.status_code == 200, "HTTP PUT update was sent OK"
    # Verify readOnly fields like date_created cannot be overridden
    res = client.get(
        url_for("tag", uuid=uuid),
        headers={'x-api-key': api_key}
    )
    date_created = res.json.get('date_created')
    assert date_created != 454444444444, "ReadOnly date_created should not be updateable"
    assert date_created != "454444444444", "ReadOnly date_created should not be updateable"
--- a/changedetectionio/tests/test_group.py
+++ b/changedetectionio/tests/test_group.py
@@ -5,6 +5,8 @@ from flask import url_for
 from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
 import os
 from ..store import ChangeDetectionStore
 # def test_setup(client, live_server, measure_memory_usage, datastore_path):
   #  live_server_setup(live_server) # Setup on conftest per function
@@ -487,7 +489,6 @@ def test_tag_json_persistence(client, live_server, measure_memory_usage, datasto
    - Tag deletion removes tag.json file
    """
    import json
    from changedetectionio.store import ChangeDetectionStore
    datastore = client.application.config.get('DATASTORE')
@@ -569,9 +570,6 @@ def test_tag_json_migration_update_27(client, live_server, measure_memory_usage,
    This simulates a pre-update_27 datastore and verifies migration works.
    """
    import json
    from changedetectionio.store import ChangeDetectionStore
    datastore = client.application.config.get('DATASTORE')
    # 1. Create multiple tags
    tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']
--- a/docs/api-spec.yaml
+++ b/docs/api-spec.yaml
@@ -28,7 +28,7 @@ info:
    For example: `x-api-key: YOUR_API_KEY`
-  version: 0.1.5
+  version: 0.1.6
  contact:
    name: ChangeDetection.io
    url: https://github.com/dgtlmoon/changedetection.io
@@ -126,13 +126,22 @@ components:
    WatchBase:
      type: object
      properties:
        uuid:
          type: string
          format: uuid
          description: Unique identifier
          readOnly: true
        date_created:
          type: [integer, 'null']
          description: Unix timestamp of creation
          readOnly: true
        url:
          type: string
          format: uri
          description: URL to monitor for changes
          maxLength: 5000
        title:
-          type: string
+          type: [string, 'null']
          description: Custom title for the web page change monitor (watch), not to be confused with page_title
          maxLength: 5000
        tag:
@@ -156,56 +165,61 @@ components:
          description: HTTP method to use
        fetch_backend:
          type: string
-          enum: [html_requests, html_webdriver]
+          description: |
-          description: Backend to use for fetching content
+            Backend to use for fetching content. Common values:
            - `system` (default) - Use the system-wide default fetcher
            - `html_requests` - Fast requests-based fetcher
            - `html_webdriver` - Browser-based fetcher (Playwright/Puppeteer)
            - `extra_browser_*` - Custom browser configurations (if configured)
            - Plugin-provided fetchers (if installed)
          pattern: '^(system|html_requests|html_webdriver|extra_browser_.+)$'
          default: system
        headers:
          type: object
          additionalProperties:
            type: string
          description: HTTP headers to include in requests
        body:
-          type: string
+          type: [string, 'null']
          description: HTTP request body
          maxLength: 5000
        proxy:
-          type: string
+          type: [string, 'null']
          description: Proxy configuration
          maxLength: 5000
        ignore_status_codes:
          type: [boolean, 'null']
          description: Ignore HTTP status code errors (boolean or null)
        webdriver_delay:
-          type: integer
+          type: [integer, 'null']
          description: Delay in seconds for webdriver
        webdriver_js_execute_code:
-          type: string
+          type: [string, 'null']
          description: JavaScript code to execute
          maxLength: 5000
        time_between_check:
          type: object
          properties:
            weeks:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 52000
              nullable: true
            days:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 365000
              nullable: true
            hours:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 8760000
              nullable: true
            minutes:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 525600000
              nullable: true
            seconds:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 31536000000
              nullable: true
          description: Time intervals between checks. All fields must be non-negative. At least one non-zero value required when not using default settings.
        time_between_check_use_default:
          type: boolean
@@ -219,11 +233,11 @@ components:
          maxItems: 100
          description: Notification URLs for this web page change monitor (watch). Maximum 100 URLs.
        notification_title:
-          type: string
+          type: [string, 'null']
          description: Custom notification title
          maxLength: 5000
        notification_body:
-          type: string
+          type: [string, 'null']
          description: Custom notification body
          maxLength: 5000
        notification_format:
@@ -231,7 +245,7 @@ components:
          enum: ['text', 'html', 'htmlcolor', 'markdown', 'System default']
          description: Format for notifications
        track_ldjson_price_data:
-          type: boolean
+          type: [boolean, 'null']
          description: Whether to track JSON-LD price data
        browser_steps:
          type: array
@@ -239,17 +253,14 @@ components:
            type: object
            properties:
              operation:
-                type: string
+                type: [string, 'null']
                maxLength: 5000
                nullable: true
              selector:
-                type: string
+                type: [string, 'null']
                maxLength: 5000
                nullable: true
              optional_value:
-                type: string
+                type: [string, 'null']
                maxLength: 5000
                nullable: true
            required: [operation, selector, optional_value]
            additionalProperties: false
          maxItems: 100
@@ -260,16 +271,197 @@ components:
          default: text_json_diff
          description: Optional processor mode to use for change detection. Defaults to `text_json_diff` if not specified.
        # Content Filtering
        include_filters:
          type: array
          items:
            type: string
            maxLength: 5000
          maxItems: 100
          description: CSS/XPath selectors to extract specific content from the page
        subtractive_selectors:
          type: array
          items:
            type: string
            maxLength: 5000
          maxItems: 100
          description: CSS/XPath selectors to remove content from the page
        ignore_text:
          type: array
          items:
            type: string
            maxLength: 5000
          maxItems: 100
          description: Text patterns to ignore in change detection
        trigger_text:
          type: array
          items:
            type: string
            maxLength: 5000
          maxItems: 100
          description: Text/regex patterns that must be present to trigger a change
        text_should_not_be_present:
          type: array
          items:
            type: string
            maxLength: 5000
          maxItems: 100
          description: Text that should NOT be present (triggers alert if found)
        extract_text:
          type: array
          items:
            type: string
            maxLength: 5000
          maxItems: 100
          description: Regex patterns to extract specific text after filtering
        # Text Processing
        trim_text_whitespace:
          type: boolean
          default: false
          description: Strip leading/trailing whitespace from text
        sort_text_alphabetically:
          type: boolean
          default: false
          description: Sort lines alphabetically before comparison
        remove_duplicate_lines:
          type: boolean
          default: false
          description: Remove duplicate lines from content
        check_unique_lines:
          type: boolean
          default: false
          description: Compare against all history for unique lines
        strip_ignored_lines:
          type: [boolean, 'null']
          description: Remove lines matching ignore patterns
        # Change Detection Filters
        filter_text_added:
          type: boolean
          default: true
          description: Include added text in change detection
        filter_text_removed:
          type: boolean
          default: true
          description: Include removed text in change detection
        filter_text_replaced:
          type: boolean
          default: true
          description: Include replaced text in change detection
        # Restock/Price Detection
        in_stock_only:
          type: boolean
          default: true
          description: Only trigger on in-stock transitions (restock_diff processor)
        follow_price_changes:
          type: boolean
          default: true
          description: Monitor and track price changes (restock_diff processor)
        price_change_threshold_percent:
          type: [number, 'null']
          description: Minimum price change percentage to trigger notification
        has_ldjson_price_data:
          type: [boolean, 'null']
          description: Whether page has LD-JSON price data (auto-detected)
          readOnly: true
        # Notifications
        notification_screenshot:
          type: boolean
          default: false
          description: Include screenshot in notifications (if supported by notification URL)
        filter_failure_notification_send:
          type: boolean
          default: true
          description: Send notification when filters fail to match content
        # History & Display
        use_page_title_in_list:
          type: [boolean, 'null']
          description: Display page title in watch list (null = use system default)
        history_snapshot_max_length:
          type: [integer, 'null']
          minimum: 1
          maximum: 1000
          description: Maximum number of history snapshots to keep (null = use system default)
        # Scheduling
        time_schedule_limit:
          type: object
          description: Weekly schedule limiting when checks can run
          properties:
            enabled:
              type: boolean
              default: false
            monday:
              $ref: '#/components/schemas/DaySchedule'
            tuesday:
              $ref: '#/components/schemas/DaySchedule'
            wednesday:
              $ref: '#/components/schemas/DaySchedule'
            thursday:
              $ref: '#/components/schemas/DaySchedule'
            friday:
              $ref: '#/components/schemas/DaySchedule'
            saturday:
              $ref: '#/components/schemas/DaySchedule'
            sunday:
              $ref: '#/components/schemas/DaySchedule'
        # Conditions (advanced logic)
        conditions:
          type: array
          items:
            type: object
            properties:
              field:
                type: string
                description: Field to check (e.g., 'page_filtered_text', 'page_title')
              operator:
                type: string
                description: Comparison operator (e.g., 'contains_regex', 'equals', 'not_equals')
              value:
                type: string
                description: Value to compare against
            required: [field, operator, value]
          maxItems: 100
          description: Array of condition rules for change detection logic (empty array when not set)
        conditions_match_logic:
          type: string
          enum: ['ALL', 'ANY']
          default: 'ALL'
          description: Logic operator - ALL (match all conditions) or ANY (match any condition)
    DaySchedule:
      type: object
      properties:
        enabled:
          type: boolean
          default: true
        start_time:
          type: string
          pattern: '^([0-1]?[0-9]|2[0-3]):[0-5][0-9]$'
          default: '00:00'
          description: Start time in HH:MM format
        duration:
          type: object
          properties:
            hours:
              type: string
              pattern: '^[0-9]+$'
              default: '24'
            minutes:
              type: string
              pattern: '^[0-9]+$'
              default: '00'
    Watch:
      allOf:
        - $ref: '#/components/schemas/WatchBase'
        - type: object
          properties:
            uuid:
              type: string
              format: uuid
              description: Unique identifier for the web page change monitor (watch)
              readOnly: true
            last_checked:
              type: integer
              description: Unix timestamp of last check
@@ -278,9 +470,10 @@ components:
              type: integer
              description: Unix timestamp of last change
              readOnly: true
              x-computed: true
            last_error:
-              type: string
+              type: [string, boolean, 'null']
-              description: Last error message
+              description: Last error message (false when no error, string when error occurred, null if not checked yet)
              readOnly: true
            last_viewed:
              type: integer
@@ -291,6 +484,61 @@ components:
              format: string
              description: The watch URL rendered in case of any Jinja2 markup, always use this for listing.
              readOnly: true
              x-computed: true
            page_title:
              type: [string, 'null']
              description: HTML <title> tag extracted from the page
              readOnly: true
            check_count:
              type: integer
              description: Total number of checks performed
              readOnly: true
            fetch_time:
              type: number
              description: Duration of last fetch in seconds
              readOnly: true
            previous_md5:
              type: [string, boolean]
              description: MD5 hash of previous content (false if not set)
              readOnly: true
            previous_md5_before_filters:
              type: [string, boolean]
              description: MD5 hash before filters applied (false if not set)
              readOnly: true
            consecutive_filter_failures:
              type: integer
              description: Counter for consecutive filter match failures
              readOnly: true
            last_notification_error:
              type: [string, 'null']
              description: Last notification error message
              readOnly: true
            notification_alert_count:
              type: integer
              description: Number of notifications sent
              readOnly: true
            content-type:
              type: [string, 'null']
              description: Content-Type from last fetch
              readOnly: true
            remote_server_reply:
              type: [string, 'null']
              description: Server header from last response
              readOnly: true
            browser_steps_last_error_step:
              type: [integer, 'null']
              description: Last browser step that caused an error
              readOnly: true
            viewed:
              type: [integer, boolean]
              description: Computed property - true if watch has been viewed, false otherwise (deprecated, use last_viewed instead)
              readOnly: true
              x-computed: true
            history_n:
              type: integer
              description: Number of history snapshots available
              readOnly: true
              x-computed: true
    CreateWatch:
      allOf:
@@ -301,34 +549,45 @@ components:
    UpdateWatch:
      allOf:
-        - $ref: '#/components/schemas/WatchBase'
+        - $ref: '#/components/schemas/WatchBase'  # Extends WatchBase for user-settable fields
        - type: object
          properties:
            last_viewed:
              type: integer
              description: Unix timestamp in seconds of the last time the watch was viewed. Setting it to a value higher than `last_changed` in the "Update watch" endpoint marks the watch as viewed.
              minimum: 0
      # Note: ReadOnly and @property fields are filtered out in the backend before update
      # We don't use unevaluatedProperties:false here to allow roundtrip GET/PUT workflows
      # where the response includes computed fields that should be silently ignored
    Tag:
-      type: object
+      allOf:
-      properties:
+        - $ref: '#/components/schemas/WatchBase'
-        uuid:
+        - type: object
-          type: string
+          properties:
-          format: uuid
+            overrides_watch:
-          description: Unique identifier for the tag
+              type: [boolean, 'null']
-          readOnly: true
+              description: |
-        title:
+                Whether this tag's settings override watch settings for all watches in this tag/group.
-          type: string
+                - true: Tag settings override watch settings
-          description: Tag title
+                - false: Tag settings do not override (watches use their own settings)
-          maxLength: 5000
+                - null: Not decided yet / inherit default behavior
-        notification_urls:
+            # Future: Aggregated statistics from all watches with this tag
-          type: array
+            # check_count:
-          items:
+            #   type: integer
-            type: string
+            #   description: Sum of check_count from all watches with this tag
-          description: Default notification URLs for web page change monitors (watches) with this tag
+            #   readOnly: true
-        notification_muted:
+            #   x-computed: true
-          type: boolean
+            # last_checked:
-          description: Whether notifications are muted for this tag
+            #   type: integer
            #   description: Most recent last_checked timestamp from all watches with this tag
            #   readOnly: true
            #   x-computed: true
            # last_changed:
            #   type: integer
            #   description: Most recent last_changed timestamp from all watches with this tag
            #   readOnly: true
            #   x-computed: true
    CreateTag:
      allOf:
--- a/docs/api_v1/index.html
+++ b/docs/api_v1/index.html
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,6 @@ flask-compress
 # 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
 flask-login>=0.6.3
 flask-paginate
 flask_expects_json~=1.7
 flask_restful
 flask_cors # For the Chrome extension to operate
 # janus # No longer needed - using pure threading.Queue for multi-loop support
@@ -126,8 +125,8 @@ greenlet >= 3.0.3
 # Default SOCKETIO_MODE=threading is recommended for better compatibility
 gevent
-# Pinned or it causes problems with flask_expects_json which seems unmaintained
+# Previously pinned for flask_expects_json (removed 2026-02). Unpinning for now.
-referencing==0.35.1
+referencing
 # For conditions
 panzi-json-logic
Author	SHA1	Message	Date
dgtlmoon	fffcc9af39	WIP Some checks failed Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-13 15:44:54 +01:00
dgtlmoon	961901c594	WIP	2026-02-13 15:15:34 +01:00
dgtlmoon	340421ea36	Minor cache	2026-02-13 14:58:23 +01:00
dgtlmoon	f29c4c8f5f	WIP	2026-02-13 14:54:34 +01:00
dgtlmoon	9702b6c8a1	Tweak message	2026-02-13 14:52:34 +01:00
dgtlmoon	798fc21f1c	WIP	2026-02-13 14:50:23 +01:00
dgtlmoon	0c6931c07c	WIP	2026-02-13 14:40:43 +01:00
dgtlmoon	60ed2a26ea	WIP	2026-02-13 14:28:56 +01:00
dgtlmoon	490ca0a663	WIP	2026-02-13 11:41:55 +01:00
dgtlmoon	10c9df288a	WIP	2026-02-13 11:24:17 +01:00
dgtlmoon	f54725d292	Increase test coverage	2026-02-13 09:18:28 +01:00
dgtlmoon	acf9e4a1e6	Remove flask_expects_json	2026-02-13 09:10:31 +01:00
dgtlmoon	7ddc0f9be0	Sync API Spec with base model	2026-02-13 09:10:04 +01:00
dgtlmoon	20f11c5c4a	Improve error logging	2026-02-13 08:49:09 +01:00
dgtlmoon	4bc01aca8d	Price tracker - Use a more memory efficient price scraper, use subprocess on linux for cleaner memory management. (#3864 ) Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details CodeQL / Analyze (javascript) (push) Has been cancelled Details CodeQL / Analyze (python) (push) Has been cancelled Details	2026-02-11 17:21:08 +01:00
dgtlmoon	ef41dd304c	Refactoring upgrade path (#3861 )	2026-02-11 16:13:08 +01:00