WIP

Minor cache
2026-06-16 13:51:44 +00:00 · 2026-02-13 15:44:54 +01:00 · 2026-02-13 15:15:34 +01:00 · 2026-02-13 14:58:23 +01:00 · 2026-02-13 14:54:34 +01:00 · 2026-02-13 14:52:34 +01:00
24 changed files with 2564 additions and 630 deletions
@@ -103,7 +103,7 @@ jobs:
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
+          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'         

  # Basic pytest tests with ancillary services
  basic-tests:
@@ -516,3 +516,142 @@ jobs:
            exit 1
          fi
          docker rm sig-test
+
+  # Upgrade path test
+  upgrade-path-test:
+    runs-on: ubuntu-latest
+    needs: build
+    timeout-minutes: 25
+    env:
+      PYTHON_VERSION: ${{ inputs.python-version }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0  # Fetch all history and tags for upgrade testing
+
+      - name: Set up Python ${{ env.PYTHON_VERSION }}
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Check upgrade works without error
+        run: |
+          echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
+
+          # Checkout old version and create datastore
+          git checkout 0.49.1
+          python3 -m venv .venv
+          source .venv/bin/activate
+          pip install -r requirements.txt
+          pip install 'pyOpenSSL>=23.2.0'
+
+          echo "=== Running version 0.49.1 to create datastore ==="
+          python3 ./changedetection.py -C -d /tmp/data &
+          APP_PID=$!
+
+          # Wait for app to be ready
+          echo "Waiting for 0.49.1 to be ready..."
+          sleep 6
+
+          # Extract API key from datastore (0.49.1 uses url-watches.json)
+          API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
+          echo "API Key: ${API_KEY:0:8}..."
+
+          # Create a watch with tag "github-group-test" via API
+          echo "Creating test watch with tag via API..."
+          curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
+            -H "x-api-key: ${API_KEY}" \
+            -H "Content-Type: application/json" \
+            --show-error --fail \
+            --retry 6 --retry-delay 1 --retry-connrefused \
+            -d '{
+              "url": "https://example.com/upgrade-test",
+              "tag": "github-group-test"
+            }'
+
+          echo "✓ Created watch with tag 'github-group-test'"
+
+          # Create a specific test URL watch
+          echo "Creating test URL watch via API..."
+          curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
+            -H "x-api-key: ${API_KEY}" \
+            -H "Content-Type: application/json" \
+            --show-error --fail \
+            -d '{
+              "url": "http://localhost/test.txt"
+            }'
+
+          echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
+
+          # Stop the old version gracefully
+          kill $APP_PID
+          wait $APP_PID || true
+          echo "✓ Version 0.49.1 stopped"
+
+          # Upgrade to current version (use commit SHA since we're in detached HEAD)
+          echo "Upgrading to commit ${{ github.sha }}"
+          git checkout ${{ github.sha }}
+          pip install -r requirements.txt
+
+          echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
+          TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
+
+          echo "=== Upgrade test output ==="
+          cat /tmp/upgrade-test.log
+          echo "✓ Datastore upgraded successfully"
+
+          # Now start the current version normally to verify the tag survived
+          echo "=== Starting current version to verify tag exists after upgrade ==="
+          timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
+          APP_PID=$!
+
+          # Wait for app to be ready and fetch UI
+          echo "Waiting for current version to be ready..."
+          sleep 5
+          curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
+
+          # Verify tag exists in UI
+          if grep -q "github-group-test" /tmp/ui-output.html; then
+            echo "✓ Tag 'github-group-test' found in UI after upgrade"
+          else
+            echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
+            echo "=== UI Output ==="
+            cat /tmp/ui-output.html
+            echo "=== App Log ==="
+            cat /tmp/ui-test.log
+            kill $APP_PID || true
+            exit 1
+          fi
+
+          # Verify test URL exists in UI
+          if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
+            echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
+          else
+            echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
+            echo "=== UI Output ==="
+            cat /tmp/ui-output.html
+            echo "=== App Log ==="
+            cat /tmp/ui-test.log
+            kill $APP_PID || true
+            exit 1
+          fi
+
+          # Cleanup
+          kill $APP_PID || true
+          wait $APP_PID || true
+
+          echo ""
+          echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
+          echo "    - Commit: ${{ github.sha }}"
+          echo "    - Datastore migrated successfully"
+          echo "    - Tag 'github-group-test' survived upgrade"
+          echo "    - Watch URL 'http://localhost/test.txt' survived upgrade"
+
+          echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
+
+      - name: Upload upgrade test logs
+        if: always()
+        uses: actions/upload-artifact@v6
+        with:
+          name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
+          path: /tmp/upgrade-test.log
@@ -371,7 +371,15 @@ def main():
        # Dont' start if the JSON DB looks corrupt
        logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
        logger.critical(str(e))
-        return
+        sys.exit(1)
+
+    # Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
+    if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
+        logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
+        logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
+        logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
+        logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
+        sys.exit(0)

    # Apply all_paused setting if specified via CLI
    if all_paused is not None:
@@ -4,6 +4,10 @@ from flask import request
 from functools import wraps
 from . import auth, validate_openapi_request
 from ..validate_url import is_safe_valid_url
+import json
+
+# Number of URLs above which import switches to background processing
+IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20


 def default_content_type(content_type='text/plain'):
@@ -19,6 +23,76 @@ def default_content_type(content_type='text/plain'):
    return decorator


+def convert_query_param_to_type(value, schema_property):
+    """
+    Convert a query parameter string to the appropriate type based on schema definition.
+
+    Args:
+        value: String value from query parameter
+        schema_property: Schema property definition with 'type' or 'anyOf' field
+
+    Returns:
+        Converted value in the appropriate type
+
+    Supports both OpenAPI 3.1 formats:
+    - type: [string, 'null']  (array format)
+    - anyOf: [{type: string}, {type: null}]  (anyOf format)
+    """
+    prop_type = schema_property.get('type')
+
+    # Handle OpenAPI 3.1 type arrays: type: [string, 'null']
+    if isinstance(prop_type, list):
+        # Use the first non-null type from the array
+        for t in prop_type:
+            if t != 'null':
+                prop_type = t
+                break
+        else:
+            prop_type = None
+
+    # Handle anyOf schemas (older format)
+    elif 'anyOf' in schema_property:
+        # Use the first non-null type from anyOf
+        for option in schema_property['anyOf']:
+            if option.get('type') and option.get('type') != 'null':
+                prop_type = option.get('type')
+                break
+        else:
+            prop_type = None
+
+    # Handle array type (e.g., notification_urls)
+    if prop_type == 'array':
+        # Support both comma-separated and JSON array format
+        if value.startswith('['):
+            try:
+                return json.loads(value)
+            except json.JSONDecodeError:
+                return [v.strip() for v in value.split(',')]
+        return [v.strip() for v in value.split(',')]
+
+    # Handle object type (e.g., time_between_check, headers)
+    elif prop_type == 'object':
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError:
+            raise ValueError(f"Invalid JSON object for field: {value}")
+
+    # Handle boolean type
+    elif prop_type == 'boolean':
+        return strtobool(value)
+
+    # Handle integer type
+    elif prop_type == 'integer':
+        return int(value)
+
+    # Handle number type (float)
+    elif prop_type == 'number':
+        return float(value)
+
+    # Default: return as string
+    return value
+
+
 class Import(Resource):
    def __init__(self, **kwargs):
        # datastore is a black box dependency
@@ -28,40 +102,128 @@ class Import(Resource):
    @default_content_type('text/plain') #3547 #3542
    @validate_openapi_request('importWatches')
    def post(self):
-        """Import a list of watched URLs."""
+        """Import a list of watched URLs with optional watch configuration."""
+        from . import get_watch_schema_properties
+        # Special parameters that are NOT watch configuration
+        special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}

        extras = {}

+        # Handle special 'proxy' parameter
        if request.args.get('proxy'):
            plist = self.datastore.proxy_list
            if not request.args.get('proxy') in plist:
-                return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
+                proxy_list_str = ', '.join(plist) if plist else 'none configured'
+                return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
            else:
                extras['proxy'] = request.args.get('proxy')

+        # Handle special 'dedupe' parameter
        dedupe = strtobool(request.args.get('dedupe', 'true'))

+        # Handle special 'tag' and 'tag_uuids' parameters
        tags = request.args.get('tag')
        tag_uuids = request.args.get('tag_uuids')

        if tag_uuids:
            tag_uuids = tag_uuids.split(',')

+        # Extract ALL other query parameters as watch configuration
+        # Get schema from OpenAPI spec (replaces old schema_create_watch)
+        schema_properties = get_watch_schema_properties()
+        for param_name, param_value in request.args.items():
+            # Skip special parameters
+            if param_name in special_params:
+                continue
+
+            # Skip if not in schema (unknown parameter)
+            if param_name not in schema_properties:
+                return f"Unknown watch configuration parameter: {param_name}", 400
+
+            # Convert to appropriate type based on schema
+            try:
+                converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
+                extras[param_name] = converted_value
+            except (ValueError, json.JSONDecodeError) as e:
+                return f"Invalid value for parameter '{param_name}': {str(e)}", 400
+
+        # Validate processor if provided
+        if 'processor' in extras:
+            from changedetectionio.processors import available_processors
+            available = [p[0] for p in available_processors()]
+            if extras['processor'] not in available:
+                return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
+
+        # Validate fetch_backend if provided
+        if 'fetch_backend' in extras:
+            from changedetectionio.content_fetchers import available_fetchers
+            available = [f[0] for f in available_fetchers()]
+            # Also allow 'system' and extra_browser_* patterns
+            is_valid = (
+                extras['fetch_backend'] == 'system' or
+                extras['fetch_backend'] in available or
+                extras['fetch_backend'].startswith('extra_browser_')
+            )
+            if not is_valid:
+                return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
+
+        # Validate notification_urls if provided
+        if 'notification_urls' in extras:
+            from wtforms import ValidationError
+            from changedetectionio.api.Notifications import validate_notification_urls
+            try:
+                validate_notification_urls(extras['notification_urls'])
+            except ValidationError as e:
+                return f"Invalid notification_urls: {str(e)}", 400
+
        urls = request.get_data().decode('utf8').splitlines()
-        added = []
+        # Clean and validate URLs upfront
+        urls_to_import = []
        for url in urls:
            url = url.strip()
            if not len(url):
                continue

-            # If hosts that only contain alphanumerics are allowed ("localhost" for example)
+            # Validate URL
            if not is_safe_valid_url(url):
                return f"Invalid or unsupported URL - {url}", 400

+            # Check for duplicates if dedupe is enabled
            if dedupe and self.datastore.url_exists(url):
                continue

-            new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
-            added.append(new_uuid)
+            urls_to_import.append(url)

-        return added
+        # For small imports, process synchronously for immediate feedback
+        if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD:
+            added = []
+            for url in urls_to_import:
+                new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
+                added.append(new_uuid)
+            return added, 200
+
+        # For large imports (>= 20), process in background thread
+        else:
+            import threading
+            from loguru import logger
+
+            def import_watches_background():
+                """Background thread to import watches - discarded after completion."""
+                try:
+                    added_count = 0
+                    for url in urls_to_import:
+                        try:
+                            self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
+                            added_count += 1
+                        except Exception as e:
+                            logger.error(f"Error importing URL {url}: {e}")
+
+                    logger.info(f"Background import complete: {added_count} watches created")
+                except Exception as e:
+                    logger.error(f"Error in background import: {e}")
+
+            # Start background thread and return immediately
+            thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background")
+            thread.start()
+
+            return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202
@@ -1,8 +1,6 @@
-from flask_expects_json import expects_json
 from flask_restful import Resource, abort
 from flask import request
 from . import auth, validate_openapi_request
-from . import schema_create_notification_urls, schema_delete_notification_urls

 class Notifications(Resource):
    def __init__(self, **kwargs):
@@ -22,7 +20,6 @@ class Notifications(Resource):
    
    @auth.check_token
    @validate_openapi_request('addNotifications')
-    @expects_json(schema_create_notification_urls)
    def post(self):
        """Create Notification URLs."""

@@ -50,7 +47,6 @@ class Notifications(Resource):
    
    @auth.check_token
    @validate_openapi_request('replaceNotifications')
-    @expects_json(schema_create_notification_urls)
    def put(self):
        """Replace Notification URLs."""
        json_data = request.get_json()
@@ -73,7 +69,6 @@ class Notifications(Resource):
        
    @auth.check_token
    @validate_openapi_request('deleteNotifications')
-    @expects_json(schema_delete_notification_urls)
    def delete(self):
        """Delete Notification URLs."""

@@ -1,6 +1,5 @@
 from changedetectionio import queuedWatchMetaData
 from changedetectionio import worker_pool
-from flask_expects_json import expects_json
 from flask_restful import abort, Resource
 from loguru import logger

@@ -8,8 +7,7 @@ import threading
 from flask import request
 from . import auth

-# Import schemas from __init__.py
-from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
+from . import validate_openapi_request


 class Tag(Resource):
@@ -69,7 +67,25 @@ class Tag(Resource):
            tag.commit()
            return "OK", 200

-        return tag
+        # Filter out Watch-specific runtime fields that don't apply to Tags (yet)
+        # TODO: Future enhancement - aggregate these values from all Watches that have this tag:
+        #   - check_count: sum of all watches' check_count
+        #   - last_checked: most recent last_checked from all watches
+        #   - last_changed: most recent last_changed from all watches
+        #   - consecutive_filter_failures: count of watches with failures
+        #   - etc.
+        # These come from watch_base inheritance but currently have no meaningful value for Tags
+        watch_only_fields = {
+            'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures',
+            'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error',
+            'last_notification_error', 'last_viewed', 'notification_alert_count',
+            'page_title', 'previous_md5', 'previous_md5_before_filters', 'remote_server_reply'
+        }
+
+        # Create clean tag dict without Watch-specific fields
+        clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}
+
+        return clean_tag

    @auth.check_token
    @validate_openapi_request('deleteTag')
@@ -102,24 +118,46 @@ class Tag(Resource):

    @auth.check_token
    @validate_openapi_request('updateTag')
-    @expects_json(schema_update_tag)
    def put(self, uuid):
        """Update tag information."""
        tag = self.datastore.data['settings']['application']['tags'].get(uuid)
        if not tag:
            abort(404, message='No tag exists with the UUID of {}'.format(uuid))

+        # Make a mutable copy of request.json for modification
+        json_data = dict(request.json)
+
        # Validate notification_urls if provided
-        if 'notification_urls' in request.json:
+        if 'notification_urls' in json_data:
            from wtforms import ValidationError
            from changedetectionio.api.Notifications import validate_notification_urls
            try:
-                notification_urls = request.json.get('notification_urls', [])
+                notification_urls = json_data.get('notification_urls', [])
                validate_notification_urls(notification_urls)
            except ValidationError as e:
                return str(e), 400

-        tag.update(request.json)
+        # Filter out readOnly fields (extracted from OpenAPI spec Tag schema)
+        # These are system-managed fields that should never be user-settable
+        from . import get_readonly_tag_fields
+        readonly_fields = get_readonly_tag_fields()
+
+        # Tag model inherits from watch_base but has no @property attributes of its own
+        # So we only need to filter readOnly fields
+        for field in readonly_fields:
+            json_data.pop(field, None)
+
+        # Validate remaining fields - reject truly unknown fields
+        # Get valid fields from Tag schema
+        from . import get_tag_schema_properties
+        valid_fields = set(get_tag_schema_properties().keys())
+
+        # Check for unknown fields
+        unknown_fields = set(json_data.keys()) - valid_fields
+        if unknown_fields:
+            return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
+
+        tag.update(json_data)
        tag.commit()

        return "OK", 200
@@ -127,13 +165,21 @@ class Tag(Resource):

    @auth.check_token
    @validate_openapi_request('createTag')
-    # Only cares for {'title': 'xxxx'}
    def post(self):
        """Create a single tag/group."""

        json_data = request.get_json()
        title = json_data.get("title",'').strip()

+        # Validate that only valid fields are provided
+        # Get valid fields from Tag schema
+        from . import get_tag_schema_properties
+        valid_fields = set(get_tag_schema_properties().keys())
+
+        # Check for unknown fields
+        unknown_fields = set(json_data.keys()) - valid_fields
+        if unknown_fields:
+            return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400

        new_uuid = self.datastore.add_tag(title=title)
        if new_uuid:
@@ -8,13 +8,11 @@ from . import auth
 from changedetectionio import queuedWatchMetaData, strtobool
 from changedetectionio import worker_pool
 from flask import request, make_response, send_from_directory
-from flask_expects_json import expects_json
 from flask_restful import abort, Resource
 from loguru import logger
 import copy

-# Import schemas from __init__.py
-from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
+from . import validate_openapi_request, get_readonly_watch_fields
 from ..notification import valid_notification_formats
 from ..notification.handler import newline_re

@@ -121,7 +119,6 @@ class Watch(Resource):

    @auth.check_token
    @validate_openapi_request('updateWatch')
-    @expects_json(schema_update_watch)
    def put(self, uuid):
        """Update watch information."""
        watch = self.datastore.data['watching'].get(uuid)
@@ -175,6 +172,35 @@ class Watch(Resource):
        # Extract and remove processor config fields from json_data
        processor_config_data = processors.extract_processor_config_from_form_data(json_data)

+        # Filter out readOnly fields (extracted from OpenAPI spec Watch schema)
+        # These are system-managed fields that should never be user-settable
+        readonly_fields = get_readonly_watch_fields()
+
+        # Also filter out @property attributes (computed/derived values from the model)
+        # These are not stored and should be ignored in PUT requests
+        from changedetectionio.model.Watch import model as WatchModel
+        property_fields = WatchModel.get_property_names()
+
+        # Combine both sets of fields to ignore
+        fields_to_ignore = readonly_fields | property_fields
+
+        # Remove all ignored fields from update data
+        for field in fields_to_ignore:
+            json_data.pop(field, None)
+
+        # Validate remaining fields - reject truly unknown fields
+        # Get valid fields from WatchBase schema
+        from . import get_watch_schema_properties
+        valid_fields = set(get_watch_schema_properties().keys())
+
+        # Also allow last_viewed (explicitly defined in UpdateWatch schema)
+        valid_fields.add('last_viewed')
+
+        # Check for unknown fields
+        unknown_fields = set(json_data.keys()) - valid_fields
+        if unknown_fields:
+            return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
+
        # Update watch with regular (non-processor-config) fields
        watch.update(json_data)
        watch.commit()
@@ -393,7 +419,6 @@ class CreateWatch(Resource):

    @auth.check_token
    @validate_openapi_request('createWatch')
-    @expects_json(schema_create_watch)
    def post(self):
        """Create a single watch."""

@@ -481,6 +506,7 @@ class CreateWatch(Resource):
                'last_error': watch['last_error'],
                'link': watch.link,
                'page_title': watch['page_title'],
+                'tags': [*tags],  # Unpack dict keys to list (can't use list() since variable named 'list')
                'title': watch['title'],
                'url': watch['url'],
                'viewed': watch.viewed
@@ -1,41 +1,6 @@
-import copy
 import functools
 from flask import request, abort
 from loguru import logger
-from . import api_schema
-from ..model import watch_base
-
-# Build a JSON Schema atleast partially based on our Watch model
-watch_base_config = watch_base()
-schema = api_schema.build_watch_json_schema(watch_base_config)
-
-schema_create_watch = copy.deepcopy(schema)
-schema_create_watch['required'] = ['url']
-del schema_create_watch['properties']['last_viewed']
-# Allow processor_config_* fields (handled separately in endpoint)
-schema_create_watch['patternProperties'] = {
-    '^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
-}
-
-schema_update_watch = copy.deepcopy(schema)
-schema_update_watch['additionalProperties'] = False
-# Allow processor_config_* fields (handled separately in endpoint)
-schema_update_watch['patternProperties'] = {
-    '^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
-}
-
-# Tag schema is also based on watch_base since Tag inherits from it
-schema_tag = copy.deepcopy(schema)
-schema_create_tag = copy.deepcopy(schema_tag)
-schema_create_tag['required'] = ['title']
-schema_update_tag = copy.deepcopy(schema_tag)
-schema_update_tag['additionalProperties'] = False
-
-schema_notification_urls = copy.deepcopy(schema)
-schema_create_notification_urls = copy.deepcopy(schema_notification_urls)
-schema_create_notification_urls['required'] = ['notification_urls']
-schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
-schema_delete_notification_urls['required'] = ['notification_urls']

@functools.cache
 def get_openapi_spec():
@@ -54,6 +19,134 @@ def get_openapi_spec():
    _openapi_spec = OpenAPI.from_dict(spec_dict)
    return _openapi_spec

+@functools.cache
+def get_openapi_schema_dict():
+    """
+    Get the raw OpenAPI spec dictionary for schema access.
+
+    Used by Import endpoint to validate and convert query parameters.
+    Returns the YAML dict directly (not the OpenAPI object).
+    """
+    import os
+    import yaml
+
+    spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
+    if not os.path.exists(spec_path):
+        spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
+
+    with open(spec_path, 'r', encoding='utf-8') as f:
+        return yaml.safe_load(f)
+
+@functools.cache
+def _resolve_schema_properties(schema_name):
+    """
+    Generic helper to resolve schema properties, including allOf inheritance.
+
+    Args:
+        schema_name: Name of the schema (e.g., 'WatchBase', 'Watch', 'Tag')
+
+    Returns:
+        dict: All properties including inherited ones from $ref schemas
+    """
+    spec_dict = get_openapi_schema_dict()
+    schema = spec_dict['components']['schemas'].get(schema_name, {})
+
+    properties = {}
+
+    # Handle allOf (schema inheritance)
+    if 'allOf' in schema:
+        for item in schema['allOf']:
+            # Resolve $ref to parent schema
+            if '$ref' in item:
+                ref_path = item['$ref'].split('/')[-1]
+                ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
+                properties.update(ref_schema.get('properties', {}))
+            # Add schema-specific properties
+            if 'properties' in item:
+                properties.update(item['properties'])
+    else:
+        # Direct properties (no inheritance)
+        properties = schema.get('properties', {})
+
+    return properties
+
+@functools.cache
+def _resolve_readonly_fields(schema_name):
+    """
+    Generic helper to resolve readOnly fields, including allOf inheritance.
+
+    Args:
+        schema_name: Name of the schema (e.g., 'Watch', 'Tag')
+
+    Returns:
+        frozenset: All readOnly field names including inherited ones
+    """
+    spec_dict = get_openapi_schema_dict()
+    schema = spec_dict['components']['schemas'].get(schema_name, {})
+
+    readonly_fields = set()
+
+    # Handle allOf (schema inheritance)
+    if 'allOf' in schema:
+        for item in schema['allOf']:
+            # Resolve $ref to parent schema
+            if '$ref' in item:
+                ref_path = item['$ref'].split('/')[-1]
+                ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
+                if 'properties' in ref_schema:
+                    for field_name, field_def in ref_schema['properties'].items():
+                        if field_def.get('readOnly') is True:
+                            readonly_fields.add(field_name)
+            # Check schema-specific properties
+            if 'properties' in item:
+                for field_name, field_def in item['properties'].items():
+                    if field_def.get('readOnly') is True:
+                        readonly_fields.add(field_name)
+    else:
+        # Direct properties (no inheritance)
+        if 'properties' in schema:
+            for field_name, field_def in schema['properties'].items():
+                if field_def.get('readOnly') is True:
+                    readonly_fields.add(field_name)
+
+    return frozenset(readonly_fields)
+
+@functools.cache
+def get_watch_schema_properties():
+    """
+    Extract watch schema properties from OpenAPI spec for Import endpoint.
+
+    Returns WatchBase properties (all writable Watch fields).
+    """
+    return _resolve_schema_properties('WatchBase')
+
+@functools.cache
+def get_readonly_watch_fields():
+    """
+    Extract readOnly field names from Watch schema in OpenAPI spec.
+
+    Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
+    """
+    return _resolve_readonly_fields('Watch')
+
+@functools.cache
+def get_tag_schema_properties():
+    """
+    Extract Tag schema properties from OpenAPI spec.
+
+    Returns WatchBase properties + Tag-specific properties (overrides_watch).
+    """
+    return _resolve_schema_properties('Tag')
+
+@functools.cache
+def get_readonly_tag_fields():
+    """
+    Extract readOnly field names from Tag schema in OpenAPI spec.
+
+    Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
+    """
+    return _resolve_readonly_fields('Tag')
+
 def validate_openapi_request(operation_id):
    """Decorator to validate incoming requests against OpenAPI spec."""
    def decorator(f):
@@ -72,8 +165,16 @@ def validate_openapi_request(operation_id):
                    if result.errors:
                        error_details = []
                        for error in result.errors:
-                            error_details.append(str(error))
-                        raise BadRequest(f"OpenAPI validation failed: {error_details}")
+                            # Extract detailed schema errors from __cause__
+                            if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
+                                for schema_error in error.__cause__.schema_errors:
+                                    field = '.'.join(str(p) for p in schema_error.path) if schema_error.path else 'body'
+                                    msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
+                                    error_details.append(f"{field}: {msg}")
+                            else:
+                                error_details.append(str(error))
+                            logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
+                        raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
            except BadRequest:
                # Re-raise BadRequest exceptions (validation failures)
                raise
@@ -1,162 +0,0 @@
-# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
-# Probably other ways to solve this when the backend switches to some ORM
-from changedetectionio.notification import valid_notification_formats
-
-
-def build_time_between_check_json_schema():
-    # Setup time between check schema
-    schema_properties_time_between_check = {
-        "type": "object",
-        "additionalProperties": False,
-        "properties": {}
-    }
-    for p in ['weeks', 'days', 'hours', 'minutes', 'seconds']:
-        schema_properties_time_between_check['properties'][p] = {
-            "anyOf": [
-                {
-                    "type": "integer"
-                },
-                {
-                    "type": "null"
-                }
-            ]
-        }
-
-    return schema_properties_time_between_check
-
-def build_watch_json_schema(d):
-    # Base JSON schema
-    schema = {
-        'type': 'object',
-        'properties': {},
-    }
-
-    for k, v in d.items():
-        # @todo 'integer' is not covered here because its almost always for internal usage
-
-        if isinstance(v, type(None)):
-            schema['properties'][k] = {
-                "anyOf": [
-                    {"type": "null"},
-                ]
-            }
-        elif isinstance(v, list):
-            schema['properties'][k] = {
-                "anyOf": [
-                    {"type": "array",
-                     # Always is an array of strings, like text or regex or something
-                     "items": {
-                         "type": "string",
-                         "maxLength": 5000
-                     }
-                     },
-                ]
-            }
-        elif isinstance(v, bool):
-            schema['properties'][k] = {
-                "anyOf": [
-                    {"type": "boolean"},
-                ]
-            }
-        elif isinstance(v, str):
-            schema['properties'][k] = {
-                "anyOf": [
-                    {"type": "string",
-                     "maxLength": 5000},
-                ]
-            }
-
-    # Can also be a string (or None by default above)
-    for v in ['body',
-              'notification_body',
-              'notification_format',
-              'notification_title',
-              'proxy',
-              'tag',
-              'title',
-              'webdriver_js_execute_code'
-              ]:
-        schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
-
-    for v in ['last_viewed']:
-        schema['properties'][v] = {
-            "type": "integer",
-            "description": "Unix timestamp in seconds of the last time the watch was viewed.",
-            "minimum": 0
-        }
-
-    # None or Boolean
-    schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
-
-    schema['properties']['method'] = {"type": "string",
-                                      "enum": ["GET", "POST", "DELETE", "PUT"]
-                                      }
-
-    schema['properties']['fetch_backend']['anyOf'].append({"type": "string",
-                                                           "enum": ["html_requests", "html_webdriver"]
-                                                           })
-
-    schema['properties']['processor'] = {"anyOf": [
-        {"type": "string", "enum": ["restock_diff", "text_json_diff"]},
-        {"type": "null"}
-    ]}
-
-    # All headers must be key/value type dict
-    schema['properties']['headers'] = {
-        "type": "object",
-        "patternProperties": {
-            # Should always be a string:string type value
-            ".*": {"type": "string"},
-        }
-    }
-
-    schema['properties']['notification_format'] = {'type': 'string',
-                                                   'enum': list(valid_notification_formats.keys())
-                                                   }
-
-    # Stuff that shouldn't be available but is just state-storage
-    for v in ['previous_md5', 'last_error', 'has_ldjson_price_data', 'previous_md5_before_filters', 'uuid']:
-        del schema['properties'][v]
-
-    schema['properties']['webdriver_delay']['anyOf'].append({'type': 'integer'})
-
-    schema['properties']['time_between_check'] = build_time_between_check_json_schema()
-
-    schema['properties']['time_between_check_use_default'] = {
-        "type": "boolean",
-        "default": True,
-        "description": "Whether to use global settings for time between checks - defaults to true if not set"
-    }
-
-    schema['properties']['browser_steps'] = {
-        "anyOf": [
-            {
-                "type": "array",
-                "items": {
-                    "type": "object",
-                    "properties": {
-                        "operation": {
-                            "type": ["string", "null"],
-                            "maxLength": 5000  # Allows null and any string up to 5000 chars (including "")
-                        },
-                        "selector": {
-                            "type": ["string", "null"],
-                            "maxLength": 5000
-                        },
-                        "optional_value": {
-                            "type": ["string", "null"],
-                            "maxLength": 5000
-                        }
-                    },
-                    "required": ["operation", "selector", "optional_value"],
-                    "additionalProperties": False  # No extra keys allowed
-                }
-            },
-            {"type": "null"},  # Allows null for `browser_steps`
-            {"type": "array", "maxItems": 0}  # Allows empty array []
-        ]
-    }
-
-    # headers ?
-    return schema
-
@@ -20,11 +20,9 @@ See: Watch.py model docstring for full Pydantic architecture explanation
 See: processors/restock_diff/processor.py:184-192 for current manual implementation
 """

-import os
 from changedetectionio.model import watch_base
 from changedetectionio.model.persistence import EntityPersistenceMixin

-
 class model(EntityPersistenceMixin, watch_base):
    """
    Tag domain model - groups watches and can override their settings.
@@ -2,7 +2,7 @@ import os
 import uuid

 from changedetectionio import strtobool
-from .persistence import EntityPersistenceMixin
+from .persistence import EntityPersistenceMixin, _determine_entity_type

 __all__ = ['EntityPersistenceMixin', 'watch_base']

@@ -26,6 +26,7 @@ class watch_base(dict):
          - Configuration override chain resolution (Watch → Tag → Global)
          - Immutability options
          - Better testing
+          - USE https://docs.pydantic.dev/latest/integrations/datamodel_code_generator TO BUILD THE MODEL FROM THE API-SPEC!!!

    CHAIN RESOLUTION ARCHITECTURE:
        The dream is a 3-level override hierarchy:
@@ -173,7 +174,7 @@ class watch_base(dict):
            'body': None,
            'browser_steps': [],
            'browser_steps_last_error_step': None,
-            'conditions' : {},
+            'conditions' : [],
            'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
            'check_count': 0,
            'check_unique_lines': False,  # On change-detected, compare against all history if its something new
@@ -299,6 +300,42 @@ class watch_base(dict):
        if self.get('default'):
            del self['default']

+    @classmethod
+    def get_property_names(cls):
+        """
+        Get all @property attribute names from this model class using introspection.
+
+        This discovers computed/derived properties that are not stored in the datastore.
+        These properties should be filtered out during PUT/POST requests.
+
+        Returns:
+            frozenset: Immutable set of @property attribute names from the model class
+        """
+        import functools
+
+        # Create a cached version if it doesn't exist
+        if not hasattr(cls, '_cached_get_property_names'):
+            @functools.cache
+            def _get_props():
+                properties = set()
+                # Use introspection to find all @property attributes
+                for name in dir(cls):
+                    # Skip private/magic attributes
+                    if name.startswith('_'):
+                        continue
+                    try:
+                        attr = getattr(cls, name)
+                        # Check if it's a property descriptor
+                        if isinstance(attr, property):
+                            properties.add(name)
+                    except (AttributeError, TypeError):
+                        continue
+                return frozenset(properties)
+
+            cls._cached_get_property_names = _get_props
+
+        return cls._cached_get_property_names()
+
    def __deepcopy__(self, memo):
        """
        Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.).
@@ -511,10 +548,8 @@ class watch_base(dict):
        # Save to disk via subclass implementation
        try:
            # Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
-            from changedetectionio.model.persistence import _determine_entity_type
            entity_type = _determine_entity_type(self.__class__)
            filename = f"{entity_type}.json"
-
            self._save_to_disk(data_dict, uuid)
            logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
        except Exception as e:
@@ -56,6 +56,259 @@ def _deduplicate_prices(data):
    return list(unique_data)


+# =============================================================================
+# MEMORY MANAGEMENT: Why We Use Multiprocessing (Linux Only)
+# =============================================================================
+#
+# The get_itemprop_availability() function uses 'extruct' to parse HTML metadata
+# (JSON-LD, microdata, OpenGraph, etc). Extruct internally uses lxml, which wraps
+# libxml2 - a C library that allocates memory at the C level.
+#
+# Memory Leak Problem:
+# --------------------
+# 1. lxml's document_fromstring() creates thousands of Python objects backed by
+#    C-level allocations (nodes, attributes, text content)
+# 2. Python's garbage collector can mark these objects as collectible, but
+#    cannot force the OS to reclaim the actual C-level memory
+# 3. malloc/free typically doesn't return memory to OS - it just marks it as
+#    "free in the process address space"
+# 4. With repeated parsing of large HTML (5MB+ pages), memory accumulates even
+#    after Python GC runs
+#
+# Why Multiprocessing Fixes This:
+# --------------------------------
+# When a subprocess exits, the OS forcibly reclaims ALL memory including C-level
+# allocations that Python GC couldn't release. This ensures clean memory state
+# after each extraction.
+#
+# Performance Impact:
+# -------------------
+# - Memray analysis showed 1.2M document_fromstring allocations per page
+# - Without subprocess: memory grows by ~50-500MB per parse and lingers
+# - With subprocess: ~35MB overhead but forces full cleanup after each run
+# - Trade-off: 35MB resource_tracker vs 500MB+ accumulated leak = much better at scale
+#
+# References:
+# -----------
+# - lxml memory issues: https://medium.com/devopss-hole/python-lxml-memory-leak-b8d0b1000dc7
+# - libxml2 caching behavior: https://www.mail-archive.com/lxml@python.org/msg00026.html
+# - GC limitations with C extensions: https://benbernardblog.com/tracking-down-a-freaky-python-memory-leak-part-2/
+#
+# Additional Context:
+# -------------------
+# - jsonpath_ng (used to query the parsed data) is pure Python and doesn't leak
+# - The leak is specifically from lxml's document parsing, not the JSONPath queries
+# - Linux-only because multiprocessing spawn is well-tested there; other platforms
+#   use direct call as fallback
+#
+# Alternative Solution (Future Optimization):
+# -------------------------------------------
+# This entire problem could be avoided by using regex to extract just the machine
+# data blocks (JSON-LD, microdata, OpenGraph tags) BEFORE parsing with lxml:
+#
+#   1. Use regex to extract <script type="application/ld+json">...</script> blocks
+#   2. Use regex to extract <meta property="og:*"> tags
+#   3. Use regex to find itemprop/itemtype attributes and their containing elements
+#   4. Parse ONLY those extracted snippets instead of the entire HTML document
+#
+# Benefits:
+#   - Avoids parsing 5MB of HTML when we only need a few KB of metadata
+#   - Eliminates the lxml memory leak entirely
+#   - Faster extraction (regex is much faster than DOM parsing)
+#   - No subprocess overhead needed
+#
+# Trade-offs:
+#   - Regex for HTML is brittle (comments, CDATA, edge cases)
+#   - Microdata extraction would be complex (need to track element boundaries)
+#   - Would need extensive testing to ensure we don't miss valid data
+#   - extruct is battle-tested; regex solution would need similar maturity
+#
+# For now, the subprocess approach is safer and leverages existing extruct code.
+# =============================================================================
+
+
+def _extract_itemprop_availability_worker(pipe_conn):
+    """
+    Subprocess worker for itemprop extraction (Linux memory management).
+
+    Uses spawn multiprocessing to isolate extruct/lxml memory allocations.
+    When the subprocess exits, the OS reclaims ALL memory including lxml's
+    C-level allocations that Python's GC cannot release.
+
+    Args:
+        pipe_conn: Pipe connection to receive HTML and send result
+    """
+    import json
+    import gc
+
+    html_content = None
+    result_data = None
+
+    try:
+        # Receive HTML as raw bytes (no pickle)
+        html_bytes = pipe_conn.recv_bytes()
+        html_content = html_bytes.decode('utf-8')
+
+        # Explicitly delete html_bytes to free memory
+        del html_bytes
+        gc.collect()
+
+        # Perform extraction in subprocess (uses extruct/lxml)
+        result_data = get_itemprop_availability(html_content)
+
+        # Convert Restock object to dict for JSON serialization
+        result = {
+            'success': True,
+            'data': dict(result_data) if result_data else {}
+        }
+        pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
+
+        # Clean up before exit
+        del result_data, html_content, result
+        gc.collect()
+
+    except MoreThanOnePriceFound:
+        # Serialize the specific exception type
+        result = {
+            'success': False,
+            'exception_type': 'MoreThanOnePriceFound'
+        }
+        pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
+
+    except Exception as e:
+        # Serialize other exceptions
+        result = {
+            'success': False,
+            'exception_type': type(e).__name__,
+            'exception_message': str(e)
+        }
+        pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
+
+    finally:
+        # Final cleanup before subprocess exits
+        # Variables may already be deleted in try block, so use try/except
+        try:
+            del html_content
+        except (NameError, UnboundLocalError):
+            pass
+        try:
+            del result_data
+        except (NameError, UnboundLocalError):
+            pass
+        gc.collect()
+        pipe_conn.close()
+
+
+def extract_itemprop_availability_safe(html_content) -> Restock:
+    """
+    Extract itemprop availability with hybrid approach for memory efficiency.
+
+    Strategy (fastest to slowest, least to most memory):
+    1. Try pure Python extraction (JSON-LD, OpenGraph, microdata) - covers 80%+ of cases
+    2. Fall back to extruct with subprocess isolation on Linux for complex cases
+
+    Args:
+        html_content: HTML string to parse
+
+    Returns:
+        Restock: Extracted availability data
+
+    Raises:
+        MoreThanOnePriceFound: When multiple prices detected
+        Other exceptions: From extruct/parsing
+    """
+    import platform
+
+    # Step 1: Try pure Python extraction first (fast, no lxml, no memory leak)
+    try:
+        from .pure_python_extractor import extract_metadata_pure_python, query_price_availability
+
+        logger.trace("Attempting pure Python metadata extraction (no lxml)")
+        extracted_data = extract_metadata_pure_python(html_content)
+        price_data = query_price_availability(extracted_data)
+
+        # If we got price AND availability, we're done!
+        if price_data.get('price') and price_data.get('availability'):
+            result = Restock(price_data)
+            logger.debug(f"Pure Python extraction successful: {dict(result)}")
+            return result
+
+        # If we got some data but not everything, still try extruct for completeness
+        if price_data.get('price') or price_data.get('availability'):
+            logger.debug(f"Pure Python extraction partial: {price_data}, will try extruct for completeness")
+
+    except Exception as e:
+        logger.debug(f"Pure Python extraction failed: {e}, falling back to extruct")
+
+    # Step 2: Fall back to extruct (uses lxml, needs subprocess on Linux)
+    logger.trace("Falling back to extruct (lxml-based) with subprocess isolation")
+
+    # Only use subprocess isolation on Linux
+    # Other platforms may have issues with spawn or don't need the aggressive memory management
+    if platform.system() == 'Linux':
+        import multiprocessing
+        import json
+        import gc
+
+        try:
+            ctx = multiprocessing.get_context('spawn')
+            parent_conn, child_conn = ctx.Pipe()
+            p = ctx.Process(target=_extract_itemprop_availability_worker, args=(child_conn,))
+            p.start()
+
+            # Send HTML as raw bytes (no pickle)
+            html_bytes = html_content.encode('utf-8')
+            parent_conn.send_bytes(html_bytes)
+
+            # Explicitly delete html_bytes copy immediately after sending
+            del html_bytes
+            gc.collect()
+
+            # Receive result as JSON
+            result_bytes = parent_conn.recv_bytes()
+            result = json.loads(result_bytes.decode('utf-8'))
+
+            # Wait for subprocess to complete
+            p.join()
+
+            # Close pipes
+            parent_conn.close()
+            child_conn.close()
+
+            # Clean up all subprocess-related objects
+            del p, parent_conn, child_conn, result_bytes
+            gc.collect()
+
+            # Handle result or re-raise exception
+            if result['success']:
+                # Reconstruct Restock object from dict
+                restock_obj = Restock(result['data'])
+                # Clean up result dict
+                del result
+                gc.collect()
+                return restock_obj
+            else:
+                # Re-raise the exception that occurred in subprocess
+                exception_type = result['exception_type']
+                exception_msg = result.get('exception_message', '')
+                del result
+                gc.collect()
+
+                if exception_type == 'MoreThanOnePriceFound':
+                    raise MoreThanOnePriceFound()
+                else:
+                    raise Exception(f"{exception_type}: {exception_msg}")
+
+        except Exception as e:
+            # If multiprocessing itself fails, log and fall back to direct call
+            logger.warning(f"Subprocess extraction failed: {e}, falling back to direct call")
+            gc.collect()
+            return get_itemprop_availability(html_content)
+    else:
+        # Non-Linux: direct call (no subprocess overhead needed)
+        return get_itemprop_availability(html_content)
+
+
 # should return Restock()
 # add casting?
 def get_itemprop_availability(html_content) -> Restock:
@@ -196,8 +449,9 @@ class perform_site_check(difference_detection_processor):
        multiple_prices_found = False

        # Try built-in extraction first, this will scan metadata in the HTML
+        # On Linux, this runs in a subprocess to prevent lxml/extruct memory leaks
        try:
-            itemprop_availability = get_itemprop_availability(self.fetcher.content)
+            itemprop_availability = extract_itemprop_availability_safe(self.fetcher.content)
        except MoreThanOnePriceFound as e:
            # Don't raise immediately - let plugins try to handle this case
            # Plugins might be able to determine which price is correct
@@ -0,0 +1,286 @@
+"""
+Pure Python metadata extractor - no lxml, no memory leaks.
+
+This module provides a fast, memory-efficient alternative to extruct for common
+e-commerce metadata extraction. It handles:
+- JSON-LD (covers 80%+ of modern sites)
+- OpenGraph meta tags
+- Basic microdata attributes
+
+Uses Python's built-in html.parser instead of lxml/libxml2, avoiding C-level
+memory allocation issues. For edge cases, the main processor can fall back to
+extruct (with subprocess isolation on Linux).
+"""
+
+from html.parser import HTMLParser
+import json
+import re
+from loguru import logger
+
+
+class JSONLDExtractor(HTMLParser):
+    """
+    Extract JSON-LD structured data from HTML.
+
+    Finds all <script type="application/ld+json"> tags and parses their content.
+    Handles multiple JSON-LD blocks on the same page.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.in_jsonld = False
+        self.data = []  # List of all parsed JSON-LD objects
+        self.current_script = []
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'script':
+            # Check if this is a JSON-LD script tag
+            for attr, value in attrs:
+                if attr == 'type' and value == 'application/ld+json':
+                    self.in_jsonld = True
+                    self.current_script = []
+                    break
+
+    def handle_data(self, data):
+        if self.in_jsonld:
+            self.current_script.append(data)
+
+    def handle_endtag(self, tag):
+        if tag == 'script' and self.in_jsonld:
+            # Parse the accumulated script content
+            script_content = ''.join(self.current_script)
+            if script_content.strip():
+                try:
+                    # Parse JSON (handles both objects and arrays)
+                    parsed = json.loads(script_content)
+                    if isinstance(parsed, list):
+                        self.data.extend(parsed)
+                    else:
+                        self.data.append(parsed)
+                except json.JSONDecodeError as e:
+                    logger.debug(f"Failed to parse JSON-LD: {e}")
+                    pass
+
+            self.in_jsonld = False
+            self.current_script = []
+
+
+class OpenGraphExtractor(HTMLParser):
+    """
+    Extract OpenGraph meta tags from HTML.
+
+    Finds <meta property="og:*"> tags commonly used for social media sharing.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.og_data = {}
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'meta':
+            attrs_dict = dict(attrs)
+            prop = attrs_dict.get('property', '')
+
+            # Extract OpenGraph properties
+            if prop.startswith('og:'):
+                content = attrs_dict.get('content', '')
+                if content:
+                    self.og_data[prop] = content
+
+
+class MicrodataExtractor(HTMLParser):
+    """
+    Extract basic microdata attributes from HTML.
+
+    Finds elements with itemprop attributes. This is a simplified extractor
+    that doesn't handle nested itemscope/itemtype hierarchies - for complex
+    cases, use extruct as fallback.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.microdata = {}
+        self.current_itemprop = None
+
+    def handle_starttag(self, tag, attrs):
+        attrs_dict = dict(attrs)
+
+        if 'itemprop' in attrs_dict:
+            itemprop = attrs_dict['itemprop']
+
+            # Price/currency/availability can be in content/href attributes
+            if itemprop == 'price':
+                if 'content' in attrs_dict:
+                    self.microdata['price'] = attrs_dict['content']
+                else:
+                    self.current_itemprop = 'price'
+
+            elif itemprop == 'priceCurrency':
+                if 'content' in attrs_dict:
+                    self.microdata['currency'] = attrs_dict['content']
+                else:
+                    self.current_itemprop = 'priceCurrency'
+
+            elif itemprop == 'availability':
+                # Can be in href (link) or content (meta)
+                if 'href' in attrs_dict:
+                    self.microdata['availability'] = attrs_dict['href']
+                elif 'content' in attrs_dict:
+                    self.microdata['availability'] = attrs_dict['content']
+                else:
+                    self.current_itemprop = 'availability'
+
+    def handle_data(self, data):
+        # Capture text content for itemprop elements
+        if self.current_itemprop == 'price':
+            # Try to extract numeric price from text
+            try:
+                price_text = re.sub(r'[^\d.]', '', data.strip())
+                if price_text:
+                    self.microdata['price'] = float(price_text)
+            except ValueError:
+                pass
+        elif self.current_itemprop == 'priceCurrency':
+            currency = data.strip()
+            if currency:
+                self.microdata['currency'] = currency
+        elif self.current_itemprop == 'availability':
+            availability = data.strip()
+            if availability:
+                self.microdata['availability'] = availability
+
+    def handle_endtag(self, tag):
+        # Reset current itemprop after closing tag
+        self.current_itemprop = None
+
+
+def extract_metadata_pure_python(html_content):
+    """
+    Extract structured metadata from HTML using pure Python parsers.
+
+    Returns a dict with three keys:
+    - 'json-ld': List of parsed JSON-LD objects
+    - 'opengraph': Dict of OpenGraph properties
+    - 'microdata': Dict of microdata properties
+
+    Args:
+        html_content: HTML string to parse
+
+    Returns:
+        dict: Extracted metadata in three formats
+    """
+    result = {
+        'json-ld': [],
+        'opengraph': {},
+        'microdata': {}
+    }
+
+    # Extract JSON-LD
+    try:
+        jsonld_extractor = JSONLDExtractor()
+        jsonld_extractor.feed(html_content)
+        result['json-ld'] = jsonld_extractor.data
+        logger.trace(f"Pure Python: Found {len(jsonld_extractor.data)} JSON-LD blocks")
+    except Exception as e:
+        logger.debug(f"JSON-LD extraction failed: {e}")
+
+    # Extract OpenGraph
+    try:
+        og_extractor = OpenGraphExtractor()
+        og_extractor.feed(html_content)
+        result['opengraph'] = og_extractor.og_data
+        if result['opengraph']:
+            logger.trace(f"Pure Python: Found {len(og_extractor.og_data)} OpenGraph tags")
+    except Exception as e:
+        logger.debug(f"OpenGraph extraction failed: {e}")
+
+    # Extract Microdata
+    try:
+        microdata_extractor = MicrodataExtractor()
+        microdata_extractor.feed(html_content)
+        result['microdata'] = microdata_extractor.microdata
+        if result['microdata']:
+            logger.trace(f"Pure Python: Found microdata: {result['microdata']}")
+    except Exception as e:
+        logger.debug(f"Microdata extraction failed: {e}")
+
+    return result
+
+
+def query_price_availability(extracted_data):
+    """
+    Query extracted metadata for price and availability information.
+
+    Uses jsonpath_ng to query JSON-LD data (same approach as extruct).
+    Falls back to OpenGraph and microdata if JSON-LD doesn't have the data.
+
+    Args:
+        extracted_data: Dict from extract_metadata_pure_python()
+
+    Returns:
+        dict: {'price': float, 'currency': str, 'availability': str}
+    """
+    from jsonpath_ng import parse
+
+    result = {}
+
+    # 1. Try JSON-LD first (most reliable and common)
+    for data in extracted_data.get('json-ld', []):
+        try:
+            # Use jsonpath to find price/availability anywhere in the structure
+            price_parse = parse('$..(price|Price)')
+            availability_parse = parse('$..(availability|Availability)')
+            currency_parse = parse('$..(priceCurrency|currency|priceCurrency)')
+
+            price_results = [m.value for m in price_parse.find(data)]
+            if price_results and not result.get('price'):
+                # Handle various price formats
+                price_val = price_results[0]
+                if isinstance(price_val, (int, float)):
+                    result['price'] = float(price_val)
+                elif isinstance(price_val, str):
+                    # Extract numeric value from string
+                    try:
+                        result['price'] = float(re.sub(r'[^\d.]', '', price_val))
+                    except ValueError:
+                        pass
+
+            avail_results = [m.value for m in availability_parse.find(data)]
+            if avail_results and not result.get('availability'):
+                result['availability'] = str(avail_results[0])
+
+            curr_results = [m.value for m in currency_parse.find(data)]
+            if curr_results and not result.get('currency'):
+                result['currency'] = str(curr_results[0])
+
+            # If we found price, this JSON-LD block is good
+            if result.get('price'):
+                logger.debug(f"Pure Python: Found price data in JSON-LD: {result}")
+                break
+
+        except Exception as e:
+            logger.debug(f"Error querying JSON-LD: {e}")
+            continue
+
+    # 2. Try OpenGraph if JSON-LD didn't provide everything
+    og_data = extracted_data.get('opengraph', {})
+    if not result.get('price') and 'og:price:amount' in og_data:
+        try:
+            result['price'] = float(og_data['og:price:amount'])
+        except ValueError:
+            pass
+    if not result.get('currency') and 'og:price:currency' in og_data:
+        result['currency'] = og_data['og:price:currency']
+    if not result.get('availability') and 'og:availability' in og_data:
+        result['availability'] = og_data['og:availability']
+
+    # 3. Use microdata as last resort
+    microdata = extracted_data.get('microdata', {})
+    if not result.get('price') and 'price' in microdata:
+        result['price'] = microdata['price']
+    if not result.get('currency') and 'currency' in microdata:
+        result['currency'] = microdata['currency']
+    if not result.get('availability') and 'availability' in microdata:
+        result['availability'] = microdata['availability']
+
+    return result
@@ -33,9 +33,8 @@ except ImportError:
 from ..processors import get_custom_watch_obj_for_processor

 # Import the base class and helpers
-from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_watch_atomic, save_tag_atomic, save_json_atomic
+from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_json_atomic
 from .updates import DatastoreUpdatesMixin
-from .legacy_loader import has_legacy_datastore

 # Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
 BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
@@ -78,7 +77,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
            copyfile(db_path, db_path_version_backup)

-    def _load_settings(self):
+    def _load_settings(self, filename="changedetection.json"):
        """
        Load settings from storage.

@@ -87,7 +86,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        Returns:
            dict: Settings data loaded from storage
        """
-        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
+        changedetection_json = os.path.join(self.datastore_path, filename)

        logger.info(f"Loading settings from {changedetection_json}")

@@ -122,6 +121,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
            if 'application' in settings_data['settings']:
                self.__data['settings']['application'].update(settings_data['settings']['application'])

+        # More or less for the old format which had this data in the one url-watches.json
+        # cant hurt to leave it here,
+        if 'watching' in settings_data:
+            self.__data['watching'].update(settings_data['watching'])
+
    def _rehydrate_tags(self):
        """Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
        from ..model import Tag
@@ -146,23 +150,28 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        logger.info(f"Rehydrating {watch_count} watches...")
        watching_rehydrated = {}
        for uuid, watch_dict in self.__data.get('watching', {}).items():
-            watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
+            if isinstance(watch_dict, dict):
+                watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
+            else:
+                logger.error(f"Watch UUID {uuid} already rehydrated")
+
        self.__data['watching'] = watching_rehydrated
        logger.success(f"Rehydrated {watch_count} watches into Watch objects")


-    def _load_state(self):
+    def _load_state(self, main_settings_filename="changedetection.json"):
        """
        Load complete datastore state from storage.

        Orchestrates loading of settings, watches, and tags using polymorphic methods.
        """
        # Load settings
-        settings_data = self._load_settings()
+        settings_data = self._load_settings(filename=main_settings_filename)
        self._apply_settings(settings_data)

-        # Load watches (polymorphic - parent class method)
+        # Load watches, scan them from the disk
        self._load_watches()
+        self._rehydrate_watches()

        # Load tags from individual tag.json files
        # These will override any tags in settings (migration path)
@@ -200,112 +209,73 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

        # Check if datastore already exists
        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
+        changedetection_json_old_schema = os.path.join(self.datastore_path, "url-watches.json")

        if os.path.exists(changedetection_json):
-            # Load existing datastore (changedetection.json + watch.json files)
-            logger.info("Loading existing datastore")
-            try:
-                self._load_state()
-            except Exception as e:
-                logger.critical(f"Failed to load datastore: {e}")
-                raise
-
            # Run schema updates if needed
            # Pass current schema version from loaded datastore (defaults to 0 if not set)
+            # Load existing datastore (changedetection.json + watch.json files)
+            logger.info("Loading existing datastore")
+            self._load_state()
+            current_schema = self.data['settings']['application'].get('schema_version', 0)
+            self.run_updates(current_schema_version=current_schema)
+
+        # Legacy datastore detected - trigger migration, even works if the schema is much before the migration step.
+        elif os.path.exists(changedetection_json_old_schema):
+
+            logger.critical(f"Legacy datastore detected at {changedetection_json_old_schema}, loading and running updates")
+            self._load_state(main_settings_filename="url-watches.json")
+            # update 26 will load the whole old config from disk to __data
            current_schema = self.__data['settings']['application'].get('schema_version', 0)
            self.run_updates(current_schema_version=current_schema)
+            # Probably tags were also shifted to disk and many other changes, so best to reload here.
+            self._load_state()

        else:
            # No datastore yet - check if this is a fresh install or legacy migration
-            # Generate app_guid FIRST (required for all operations)
-            if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
-                self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
-            else:
-                self.__data['app_guid'] = str(uuid_builder.uuid4())
+            self.init_fresh_install(include_default_watches=include_default_watches,
+                                    version_tag=version_tag)

-            # Generate RSS access token
-            self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
+    def init_fresh_install(self, include_default_watches, version_tag):
+      # Generate app_guid FIRST (required for all operations)
+        if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
+            self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
+        else:
+            self.__data['app_guid'] = str(uuid_builder.uuid4())

-            # Generate API access token
-            self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
+        # Generate RSS access token
+        self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)

-            # Check if legacy datastore exists (url-watches.json)
-            if has_legacy_datastore(self.datastore_path):
-                # Legacy datastore detected - trigger migration
-                logger.critical(f"Legacy datastore detected at {self.datastore_path}/url-watches.json")
-                logger.critical("Migration will be triggered via update_26")
+        # Generate API access token
+        self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
+        logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")

-                # Load the legacy datastore
-                from .legacy_loader import load_legacy_format
-                legacy_path = os.path.join(self.datastore_path, "url-watches.json")
-                legacy_data = load_legacy_format(legacy_path)
+        # Set schema version to latest (no updates needed)
+        latest_update_available = self.get_updates_available().pop()
+        logger.info(f"Marking fresh install to schema version {latest_update_available}")
+        self.__data['settings']['application']['schema_version'] = latest_update_available

-                if not legacy_data:
-                    raise Exception("Failed to load legacy datastore from url-watches.json")
+        # Add default watches if requested
+        if include_default_watches:
+            self.add_watch(
+                url='https://news.ycombinator.com/',
+                tag='Tech news',
+                extras={'fetch_backend': 'html_requests'}
+            )
+            self.add_watch(
+                url='https://changedetection.io/CHANGELOG.txt',
+                tag='changedetection.io',
+                extras={'fetch_backend': 'html_requests'}
+            )

-                # Merge legacy data with base_config defaults (preserves new fields like 'ui')
-                # self.__data already has App.model() defaults from line 190
-                logger.info("Merging legacy data with base_config defaults...")
-
-                # Apply top-level fields from legacy data
-                if 'app_guid' in legacy_data:
-                    self.__data['app_guid'] = legacy_data['app_guid']
-                if 'build_sha' in legacy_data:
-                    self.__data['build_sha'] = legacy_data['build_sha']
-                if 'version_tag' in legacy_data:
-                    self.__data['version_tag'] = legacy_data['version_tag']
-
-                # Apply watching data (complete replacement as these are user's watches)
-                if 'watching' in legacy_data:
-                    self.__data['watching'] = legacy_data['watching']
-
-                # Merge settings sections (preserves base_config defaults for missing fields)
-                if 'settings' in legacy_data:
-                    if 'headers' in legacy_data['settings']:
-                        self.__data['settings']['headers'].update(legacy_data['settings']['headers'])
-                    if 'requests' in legacy_data['settings']:
-                        self.__data['settings']['requests'].update(legacy_data['settings']['requests'])
-                    if 'application' in legacy_data['settings']:
-                        # CRITICAL: Use .update() to merge, not replace
-                        # This preserves new fields like 'ui' that exist in base_config
-                        self.__data['settings']['application'].update(legacy_data['settings']['application'])
-
-                # CRITICAL: Rehydrate watches from dicts into Watch objects
-                # This ensures watches have their methods available during migration
-                self._rehydrate_watches()
-
-                # update_26 will save watches to individual files and create changedetection.json
-                # Next startup will load from new format normally
-                self.run_updates()
+        # Create changedetection.json immediately
+        try:
+            self._save_settings()
+            logger.info("Created changedetection.json for new datastore")
+        except Exception as e:
+            logger.error(f"Failed to create initial changedetection.json: {e}")


-            else:
-                # Fresh install - create new datastore
-                logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
-
-                # Set schema version to latest (no updates needed)
-                updates_available = self.get_updates_available()
-                self.__data['settings']['application']['schema_version'] = updates_available.pop() if updates_available else 26
-
-                # Add default watches if requested
-                if include_default_watches:
-                    self.add_watch(
-                        url='https://news.ycombinator.com/',
-                        tag='Tech news',
-                        extras={'fetch_backend': 'html_requests'}
-                    )
-                    self.add_watch(
-                        url='https://changedetection.io/CHANGELOG.txt',
-                        tag='changedetection.io',
-                        extras={'fetch_backend': 'html_requests'}
-                    )
-
-                # Create changedetection.json immediately
-                try:
-                    self._save_settings()
-                    logger.info("Created changedetection.json for new datastore")
-                except Exception as e:
-                    logger.error(f"Failed to create initial changedetection.json: {e}")

        # Set version tag
        self.__data['version_tag'] = version_tag
@@ -383,17 +353,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        # Deep copy settings to avoid modifying the original
        settings_copy = copy.deepcopy(self.__data['settings'])

-        # Only exclude tags if we've already migrated them to individual files (schema >= 28)
-        # This ensures update_28 can migrate tags from settings
-        schema_version = self.__data['settings']['application'].get('schema_version', 0)
-        if schema_version >= 28:
-            # Tags are in individual tag.json files, don't save to settings
-            settings_copy['application']['tags'] = {}
-        # else: keep tags in settings for update_28 migration
-
        return {
            'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
-            'app_guid': self.__data['app_guid'],
+            'app_guid': self.__data.get('app_guid'),
            'settings': settings_copy,
            'build_sha': self.__data.get('build_sha'),
            'version_tag': self.__data.get('version_tag')
@@ -422,15 +384,14 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        Implementation of abstract method from FileSavingDataStore.
        Delegates to helper function and stores results in internal data structure.
        """
-        watching = load_all_watches(
-            self.datastore_path,
-            self.rehydrate_entity
-        )

        # Store loaded data
-        self.__data['watching'] = watching
-
-        logger.debug(f"Loaded {len(watching)} watches")
+        # @note this will also work for the old legacy format because self.__data['watching'] should already have them loaded by this point.
+        self.__data['watching'].update(load_all_watches(
+            self.datastore_path,
+            self.rehydrate_entity
+        ))
+        logger.debug(f"Loaded {len(self.__data['watching'])} watches")

    def _load_tags(self):
        """
@@ -207,15 +207,6 @@ def save_watch_atomic(watch_dir, uuid, watch_dict):
    save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)


-def save_tag_atomic(tag_dir, uuid, tag_dict):
-    """
-    Save a tag to disk using atomic write pattern.
-
-    Convenience wrapper around save_entity_atomic for tags.
-    Kept for backwards compatibility.
-    """
-    save_entity_atomic(tag_dir, uuid, tag_dict, "tag.json", "tag", max_size_mb=1)
-

 def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
    """
@@ -1,66 +0,0 @@
-"""
-Legacy format loader for url-watches.json.
-
-Provides functions to detect and load from the legacy monolithic JSON format.
-Used during migration (update_26) to transition to individual watch.json files.
-"""
-
-import os
-import json
-from loguru import logger
-
-# Try to import orjson for faster JSON serialization
-try:
-    import orjson
-    HAS_ORJSON = True
-except ImportError:
-    HAS_ORJSON = False
-
-
-def has_legacy_datastore(datastore_path):
-    """
-    Check if a legacy url-watches.json file exists.
-
-    This is used by update_26 to determine if migration is needed.
-
-    Args:
-        datastore_path: Path to datastore directory
-
-    Returns:
-        bool: True if url-watches.json exists
-    """
-    url_watches_json = os.path.join(datastore_path, "url-watches.json")
-    return os.path.exists(url_watches_json)
-
-
-def load_legacy_format(json_store_path):
-    """
-    Load datastore from legacy url-watches.json format.
-
-    Args:
-        json_store_path: Full path to url-watches.json file
-
-    Returns:
-        dict: Loaded datastore data with 'watching', 'settings', etc.
-        None: If file doesn't exist or loading failed
-    """
-    logger.info(f"Loading from legacy format: {json_store_path}")
-
-    if not os.path.isfile(json_store_path):
-        logger.warning(f"Legacy file not found: {json_store_path}")
-        return None
-
-    try:
-        if HAS_ORJSON:
-            with open(json_store_path, 'rb') as f:
-                data = orjson.loads(f.read())
-        else:
-            with open(json_store_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-
-        logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
-        return data
-
-    except Exception as e:
-        logger.error(f"Failed to load legacy format: {e}")
-        return None
@@ -16,12 +16,18 @@ import time
 from loguru import logger
 from copy import deepcopy

+
+# Try to import orjson for faster JSON serialization
+try:
+    import orjson
+    HAS_ORJSON = True
+except ImportError:
+    HAS_ORJSON = False
+
 from ..html_tools import TRANSLATE_WHITESPACE_TABLE
 from ..processors.restock_diff import Restock
 from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
 from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
-from .file_saving_datastore import save_watch_atomic
-

 def create_backup_tarball(datastore_path, update_number):
    """
@@ -97,7 +103,7 @@ def create_backup_tarball(datastore_path, update_number):
                    tar.add(tag_json, arcname=f"{entry}/tag.json")
                    tag_count += 1

-            logger.success(f"Backup created: {backup_filename} ({watch_count} watches, {tag_count} tags)")
+            logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
            return backup_path

    except Exception as e:
@@ -137,6 +143,7 @@ class DatastoreUpdatesMixin:
        return updates_available

    def run_updates(self, current_schema_version=None):
+        import sys
        """
        Run all pending schema updates sequentially.

@@ -160,6 +167,23 @@ class DatastoreUpdatesMixin:
        4. All changes saved via individual .commit() calls
        """
        updates_available = self.get_updates_available()
+        if self.data.get('watching'):
+            test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
+            from ..model.Watch import model
+
+            if not isinstance(test_watch, model):
+                import sys
+                logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
+                sys.exit(1)
+
+        if self.data['settings']['application'].get('tags',{}):
+            test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
+            from ..model.Tag import model as tag_model
+
+            if not isinstance(test_tag, tag_model):
+                import sys
+                logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
+                sys.exit(1)

        # Determine current schema version
        if current_schema_version is None:
@@ -201,10 +225,9 @@ class DatastoreUpdatesMixin:
                try:
                    update_method = getattr(self, f"update_{update_n}")()
                except Exception as e:
-                    logger.error(f"Error while trying update_{update_n}")
-                    logger.error(e)
-                    # Don't run any more updates
-                    return
+                    logger.critical(f"Error while trying update_{update_n}")
+                    logger.exception(e)
+                    sys.exit(1)
                else:
                    # Bump the version
                    self.data['settings']['application']['schema_version'] = update_n
@@ -555,27 +578,6 @@ class DatastoreUpdatesMixin:
        logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
        logger.critical("=" * 80)

-        # Check if already migrated
-        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
-        if os.path.exists(changedetection_json):
-            logger.info("Migration already completed (changedetection.json exists), skipping")
-            return
-
-        # Check if we need to load legacy data
-        from .legacy_loader import has_legacy_datastore, load_legacy_format
-
-        if not has_legacy_datastore(self.datastore_path):
-            logger.info("No legacy datastore found, nothing to migrate")
-            return
-
-        # Load legacy data from url-watches.json
-        logger.critical("Loading legacy datastore from url-watches.json...")
-        legacy_path = os.path.join(self.datastore_path, "url-watches.json")
-        legacy_data = load_legacy_format(legacy_path)
-
-        if not legacy_data:
-            raise Exception("Failed to load legacy datastore from url-watches.json")
-
        # Populate settings from legacy data
        logger.info("Populating settings from legacy data...")
        watch_count = len(self.data['watching'])
@@ -587,9 +589,7 @@ class DatastoreUpdatesMixin:
        saved_count = 0
        for uuid, watch in self.data['watching'].items():
            try:
-                watch_dict = dict(watch)
-                watch_dir = os.path.join(self.datastore_path, uuid)
-                save_watch_atomic(watch_dir, uuid, watch_dict)
+                watch.commit()
                saved_count += 1

                if saved_count % 100 == 0:
@@ -635,18 +635,19 @@ class DatastoreUpdatesMixin:

        # Phase 4: Verify settings file exists
        logger.critical("Phase 4/4: Verifying changedetection.json exists...")
+        changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
+        if not os.path.isfile(changedetection_json_new_schema):
+            import sys
+            logger.critical("Migration failed, changedetection.json not found after update ran!")
+            sys.exit(1)

-        if not os.path.isfile(changedetection_json):
-            raise Exception(
-                "Migration failed: changedetection.json not found after save. "
-                "url-watches.json remains intact, safe to retry."
-            )

        logger.critical("Phase 4 complete: Verified changedetection.json exists")

        # Success! Now reload from new format
        logger.critical("Reloading datastore from new format...")
-        self._load_state() # Includes load_watches
+        # write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
+        self._save_settings()
        logger.success("Datastore reloaded from new format successfully")
        logger.critical("=" * 80)
        logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
@@ -681,9 +682,11 @@ class DatastoreUpdatesMixin:
        - Enables independent tag versioning/backup
        - Maintains backwards compatibility (tags stay in settings too)
        """
+        # Force save as tag.json (not watch.json) even if object is corrupted
+
        logger.critical("=" * 80)
        logger.critical("Running migration: Individual tag persistence (update_28)")
-        logger.critical("Creating individual tag.json files (tags remain in settings too)")
+        logger.critical("Creating individual tag.json files")
        logger.critical("=" * 80)

        tags = self.data['settings']['application'].get('tags', {})
@@ -700,27 +703,8 @@ class DatastoreUpdatesMixin:

        for uuid, tag_data in tags.items():
            try:
-                # Force save as tag.json (not watch.json) even if object is corrupted
-                from changedetectionio.store.file_saving_datastore import save_entity_atomic
-                import os
-
-                tag_dir = os.path.join(self.datastore_path, uuid)
-                os.makedirs(tag_dir, exist_ok=True)
-
-                # Convert to dict if it's an object
-                tag_dict = dict(tag_data) if hasattr(tag_data, '__iter__') else tag_data
-
-                # Save explicitly as tag.json
-                save_entity_atomic(
-                    tag_dir,
-                    uuid,
-                    tag_dict,
-                    filename='tag.json',
-                    entity_type='tag',
-                    max_size_mb=1
-                )
+                tag_data.commit()
                saved_count += 1
-
                if saved_count % 10 == 0:
                    logger.info(f"  Progress: {saved_count}/{tag_count} tags migrated...")

@@ -737,5 +721,5 @@ class DatastoreUpdatesMixin:
        # On next load, _load_tags() will read from tag.json files and merge with settings
        logger.info("Tags saved to both settings AND individual tag.json files")
        logger.info("Future tag edits will update both locations (dual storage)")
+        logger.critical("=" * 80)

-        logger.critical("=" * 80)
@@ -328,6 +328,68 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
    )
    assert len(res.json) == 0, "Watch list should be empty"

+def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
+    """
+    Test the full round trip, this way we test the default Model fits back into OpenAPI spec
+    :param client:
+    :param live_server:
+    :param measure_memory_usage:
+    :param datastore_path:
+    :return:
+    """
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    set_original_response(datastore_path=datastore_path)
+    test_url = url_for('test_endpoint', _external=True)
+
+    # Create new
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({"url": test_url}),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 201
+    uuid = res.json.get('uuid')
+
+    # Now fetch it and send it back
+
+    res = client.get(
+        url_for("watch", uuid=uuid),
+        headers={'x-api-key': api_key}
+    )
+
+    watch=res.json
+
+    # Be sure that 'readOnly' values are never updated in the real watch
+    watch['last_changed'] = 454444444444
+    watch['date_created'] = 454444444444
+
+    # HTTP PUT ( UPDATE an existing watch )
+    res = client.put(
+        url_for("watch", uuid=uuid),
+        headers={'x-api-key': api_key, 'content-type': 'application/json'},
+        data=json.dumps(watch),
+    )
+    if res.status_code != 200:
+        print(f"\n=== PUT failed with {res.status_code} ===")
+        print(f"Error: {res.data}")
+    assert res.status_code == 200, "HTTP PUT update was sent OK"
+
+    res = client.get(
+        url_for("watch", uuid=uuid),
+        headers={'x-api-key': api_key}
+    )
+    last_changed = res.json.get('last_changed')
+    assert last_changed != 454444444444
+    assert last_changed != "454444444444"
+
+    date_created = res.json.get('date_created')
+    assert date_created != 454444444444
+    assert date_created != "454444444444"
+
+
 def test_access_denied(client, live_server, measure_memory_usage, datastore_path):
    # `config_api_token_enabled` Should be On by default
    res = client.get(
@@ -401,6 +463,9 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
        follow_redirects=True
    )

+    if res.status_code != 201:
+        print(f"\n=== POST createwatch failed with {res.status_code} ===")
+        print(f"Response: {res.data}")
    assert res.status_code == 201

    wait_for_all_checks(client)
@@ -464,11 +529,12 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
    )

    assert res.status_code == 400, "Should get error 400 when we give a field that doesnt exist"
-    # Message will come from `flask_expects_json`
-    # With patternProperties for processor_config_*, the error message format changed slightly
-    assert (b'Additional properties are not allowed' in res.data or
+    # Backend validation now rejects unknown fields with a clear error message
+    assert (b'Unknown field' in res.data or
+            b'Additional properties are not allowed' in res.data or
+            b'Unevaluated properties are not allowed' in res.data or
            b'does not match any of the regexes' in res.data), \
-            "Should reject unknown fields with schema validation error"
+            "Should reject unknown fields with validation error"


    # Try a XSS URL
@@ -489,6 +555,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):

    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

+    # Test 1: Basic import with tag
    res = client.post(
        url_for("import") + "?tag=import-test",
        data='https://website1.com\r\nhttps://website2.com',
@@ -507,6 +574,239 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
    res = client.get(url_for('tags.tags_overview_page'))
    assert b'import-test' in res.data

+    # Test 2: Import with watch configuration fields (issue #3845)
+    # Test string field (include_filters), boolean (paused), and processor
+    import urllib.parse
+    params = urllib.parse.urlencode({
+        'tag': 'config-test',
+        'include_filters': 'div.content',
+        'paused': 'true',
+        'processor': 'text_json_diff',
+        'title': 'Imported with Config'
+    })
+
+    res = client.post(
+        url_for("import") + "?" + params,
+        data='https://website3.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 200
+    assert len(res.json) == 1
+    uuid = res.json[0]
+
+    # Verify the configuration was applied
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    assert watch['include_filters'] == ['div.content'], "include_filters should be set as array"
+    assert watch['paused'] == True, "paused should be True"
+    assert watch['processor'] == 'text_json_diff', "processor should be set"
+    assert watch['title'] == 'Imported with Config', "title should be set"
+
+    # Test 3: Import with array field (notification_urls) - using valid Apprise format
+    params = urllib.parse.urlencode({
+        'tag': 'notification-test',
+        'notification_urls': 'mailto://test@example.com,mailto://admin@example.com'
+    })
+
+    res = client.post(
+        url_for("import") + "?" + params,
+        data='https://website4.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 200
+    uuid = res.json[0]
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    assert isinstance(watch['notification_urls'], list), "notification_urls must be stored as a list"
+    assert len(watch['notification_urls']) == 2, "notification_urls should have 2 entries"
+    assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
+    assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
+
+    # Test 4: Import with object field (time_between_check)
+    import json
+    time_config = json.dumps({"hours": 2, "minutes": 30})
+    params = urllib.parse.urlencode({
+        'tag': 'schedule-test',
+        'time_between_check': time_config
+    })
+
+    res = client.post(
+        url_for("import") + "?" + params,
+        data='https://website5.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 200
+    uuid = res.json[0]
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    assert watch['time_between_check']['hours'] == 2, "time_between_check hours should be 2"
+    assert watch['time_between_check']['minutes'] == 30, "time_between_check minutes should be 30"
+
+    # Test 5: Import with invalid processor (should fail)
+    res = client.post(
+        url_for("import") + "?processor=invalid_processor",
+        data='https://website6.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 400, "Should reject invalid processor"
+    assert b"Invalid processor" in res.data, "Error message should mention invalid processor"
+
+    # Test 6: Import with invalid field (should fail)
+    res = client.post(
+        url_for("import") + "?unknown_field=value",
+        data='https://website7.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 400, "Should reject unknown field"
+    assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
+
+    # Test 7: Import with complex nested array (browser_steps) - array of objects
+    browser_steps = json.dumps([
+        {"operation": "wait", "selector": "5", "optional_value": ""},
+        {"operation": "click", "selector": "button.submit", "optional_value": ""}
+    ])
+    params = urllib.parse.urlencode({
+        'tag': 'browser-test',
+        'browser_steps': browser_steps
+    })
+
+    res = client.post(
+        url_for("import") + "?" + params,
+        data='https://website8.com',
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    assert res.status_code == 200, "Should accept browser_steps array"
+    uuid = res.json[0]
+    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
+    assert len(watch['browser_steps']) == 2, "Should have 2 browser steps"
+    assert watch['browser_steps'][0]['operation'] == 'wait', "First step should be wait"
+    assert watch['browser_steps'][1]['operation'] == 'click', "Second step should be click"
+    assert watch['browser_steps'][1]['selector'] == 'button.submit', "Second step selector should be button.submit"
+
+    # Cleanup
+    delete_all_watches(client)
+
+
+def test_api_import_small_synchronous(client, live_server, measure_memory_usage, datastore_path):
+    """Test that small imports (< threshold) are processed synchronously"""
+    from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
+
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    # Use local test endpoint to avoid network delays
+    test_url_base = url_for('test_endpoint', _external=True)
+
+    # Create URLs: threshold - 1 to stay under limit
+    num_urls = min(5, IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD - 1)  # Use small number for faster test
+    urls = '\n'.join([f'{test_url_base}?id=small-{i}' for i in range(num_urls)])
+
+    # Import small batch
+    res = client.post(
+        url_for("import") + "?tag=small-test",
+        data=urls,
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    # Should return 200 OK with UUID list (synchronous)
+    assert res.status_code == 200, f"Should return 200 for small imports, got {res.status_code}"
+    assert isinstance(res.json, list), "Response should be a list of UUIDs"
+    assert len(res.json) == num_urls, f"Should return {num_urls} UUIDs, got {len(res.json)}"
+
+    # Verify all watches were created immediately
+    for uuid in res.json:
+        assert uuid in live_server.app.config['DATASTORE'].data['watching'], \
+            f"Watch {uuid} should exist immediately after synchronous import"
+
+    print(f"\n✓ Successfully created {num_urls} watches synchronously")
+
+
+def test_api_import_large_background(client, live_server, measure_memory_usage, datastore_path):
+    """Test that large imports (>= threshold) are processed in background thread"""
+    from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
+    import time
+
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+
+    # Use local test endpoint to avoid network delays
+    test_url_base = url_for('test_endpoint', _external=True)
+
+    # Create URLs: threshold + 10 to trigger background processing
+    num_urls = IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD + 10
+    urls = '\n'.join([f'{test_url_base}?id=bulk-{i}' for i in range(num_urls)])
+
+    # Import large batch
+    res = client.post(
+        url_for("import") + "?tag=bulk-test",
+        data=urls,
+        headers={'x-api-key': api_key},
+        follow_redirects=True
+    )
+
+    # Should return 202 Accepted (background processing)
+    assert res.status_code == 202, f"Should return 202 for large imports, got {res.status_code}"
+    assert b"background" in res.data.lower(), "Response should mention background processing"
+
+    # Extract expected count from response
+    response_json = res.json
+    assert 'count' in response_json, "Response should include count"
+    assert response_json['count'] == num_urls, f"Count should be {num_urls}, got {response_json['count']}"
+
+    # Wait for background thread to complete (with timeout)
+    max_wait = 10  # seconds
+    wait_interval = 0.5
+    elapsed = 0
+    watches_created = 0
+
+    while elapsed < max_wait:
+        time.sleep(wait_interval)
+        elapsed += wait_interval
+
+        # Count how many watches have been created
+        watches_created = len([
+            uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
+            if 'id=bulk-' in watch['url']
+        ])
+
+        if watches_created == num_urls:
+            break
+
+    # Verify all watches were created
+    assert watches_created == num_urls, \
+        f"Expected {num_urls} watches to be created, but found {watches_created} after {elapsed}s"
+
+    # Verify watches have correct configuration
+    bulk_watches = [
+        watch for watch in live_server.app.config['DATASTORE'].data['watching'].values()
+        if 'id=bulk-' in watch['url']
+    ]
+
+    assert len(bulk_watches) == num_urls, "All bulk watches should exist"
+
+    # Check that they have the correct tag
+    datastore = live_server.app.config['DATASTORE']
+    # Get UUIDs of bulk watches by filtering the datastore keys
+    bulk_watch_uuids = [
+        uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
+        if 'id=bulk-' in watch['url']
+    ]
+    for watch_uuid in bulk_watch_uuids:
+        tags = datastore.get_all_tags_for_watch(uuid=watch_uuid)
+        tag_names = [t['title'] for t in tags.values()]
+        assert 'bulk-test' in tag_names, f"Watch {watch_uuid} should have 'bulk-test' tag"
+
+    print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
+
+
 def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):


@@ -633,7 +933,9 @@ def test_api_url_validation(client, live_server, measure_memory_usage, datastore
    )
    assert res.status_code == 400, "Updating watch URL to null should fail"
    # Accept either OpenAPI validation error or our custom validation error
-    assert b'URL cannot be null' in res.data or b'OpenAPI validation failed' in res.data or b'validation error' in res.data.lower()
+    assert (b'URL cannot be null' in res.data or
+            b'Validation failed' in res.data or
+            b'validation error' in res.data.lower())

    # Test 8: UPDATE to empty string URL should fail
    res = client.put(
@@ -720,3 +1022,140 @@ def test_api_url_validation(client, live_server, measure_memory_usage, datastore
        headers={'x-api-key': api_key},
    )
    delete_all_watches(client)
+
+
+def test_api_time_between_check_validation(client, live_server, measure_memory_usage, datastore_path):
+    """
+    Test that time_between_check validation works correctly:
+    - When time_between_check_use_default is false, at least one time value must be > 0
+    - Values must be valid integers
+    """
+    import json
+    from flask import url_for
+    
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+    
+    # Test 1: time_between_check_use_default=false with NO time_between_check should fail
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example.com",
+            "time_between_check_use_default": False
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 400, "Should fail when time_between_check_use_default=false with no time_between_check"
+    assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
+    
+    # Test 2: time_between_check_use_default=false with ALL zeros should fail
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example.com",
+            "time_between_check_use_default": False,
+            "time_between_check": {
+                "weeks": 0,
+                "days": 0,
+                "hours": 0,
+                "minutes": 0,
+                "seconds": 0
+            }
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 400, "Should fail when all time values are 0"
+    assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
+    
+    # Test 3: time_between_check_use_default=false with NULL values should fail
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example.com",
+            "time_between_check_use_default": False,
+            "time_between_check": {
+                "weeks": None,
+                "days": None,
+                "hours": None,
+                "minutes": None,
+                "seconds": None
+            }
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 400, "Should fail when all time values are null"
+    assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
+    
+    # Test 4: time_between_check_use_default=false with valid hours should succeed
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example.com",
+            "time_between_check_use_default": False,
+            "time_between_check": {
+                "hours": 2
+            }
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 201, "Should succeed with valid hours value"
+    uuid1 = res.json.get('uuid')
+    
+    # Test 5: time_between_check_use_default=false with valid minutes should succeed
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example2.com",
+            "time_between_check_use_default": False,
+            "time_between_check": {
+                "minutes": 30
+            }
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 201, "Should succeed with valid minutes value"
+    uuid2 = res.json.get('uuid')
+    
+    # Test 6: time_between_check_use_default=true (or missing) with no time_between_check should succeed (uses defaults)
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example3.com",
+            "time_between_check_use_default": True
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 201, "Should succeed when using default settings"
+    uuid3 = res.json.get('uuid')
+    
+    # Test 7: Default behavior (no time_between_check_use_default field) should use defaults and succeed
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example4.com"
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 201, "Should succeed with default behavior (using global settings)"
+    uuid4 = res.json.get('uuid')
+    
+    # Test 8: Verify integer type validation - string should fail (OpenAPI validation)
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": "https://example5.com",
+            "time_between_check_use_default": False,
+            "time_between_check": {
+                "hours": "not_a_number"
+            }
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+    )
+    assert res.status_code == 400, "Should fail when time value is not an integer"
+    assert b"Validation failed" in res.data or b"not of type" in res.data, "Should mention validation/type error"
+    
+    # Cleanup
+    for uuid in [uuid1, uuid2, uuid3, uuid4]:
+        client.delete(
+            url_for("watch", uuid=uuid),
+            headers={'x-api-key': api_key},
+        )
@@ -107,7 +107,7 @@ def test_watch_notification_urls_validation(client, live_server, measure_memory_
        headers={'content-type': 'application/json', 'x-api-key': api_key}
    )
    assert res.status_code == 400, "Should reject non-list notification_urls"
-    assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
+    assert b"Validation failed" in res.data or b"is not of type" in res.data

    # Test 6: Verify original URLs are preserved after failed update
    res = client.get(
@@ -159,7 +159,7 @@ def test_tag_notification_urls_validation(client, live_server, measure_memory_us
        headers={'content-type': 'application/json', 'x-api-key': api_key}
    )
    assert res.status_code == 400, "Should reject non-list notification_urls"
-    assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
+    assert b"Validation failed" in res.data or b"is not of type" in res.data

    # Test 4: Verify original URLs are preserved after failed update
    tag = datastore.data['settings']['application']['tags'][tag_uuid]
@@ -26,7 +26,7 @@ def test_openapi_validation_invalid_content_type_on_create_watch(client, live_se

    # Should get 400 error due to OpenAPI validation failure
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"


 def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage, datastore_path):
@@ -43,7 +43,7 @@ def test_openapi_validation_missing_required_field_create_watch(client, live_ser

    # Should get 400 error due to missing required field
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"


 def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage, datastore_path):
@@ -80,10 +80,9 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
    # Should get 400 error due to invalid field (this will be caught by internal validation)
    # Note: This tests the flow where OpenAPI validation passes but internal validation catches it
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    # With patternProperties for processor_config_*, the error message format changed slightly
-    assert (b"Additional properties are not allowed" in res.data or
-            b"does not match any of the regexes" in res.data), \
-            "Should contain validation error about additional/invalid properties"
+    # Backend validation now returns "Unknown field(s):" message
+    assert b"Unknown field" in res.data, \
+            "Should contain validation error about unknown fields"


 def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
@@ -100,7 +99,7 @@ def test_openapi_validation_import_wrong_content_type(client, live_server, measu

    # Should get 400 error due to content-type mismatch
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"


 def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage, datastore_path):
@@ -158,7 +157,7 @@ def test_openapi_validation_create_tag_missing_required_title(client, live_serve

    # Should get 400 error due to missing required field
    assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
-    assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
+    assert b"Validation failed" in res.data, "Should contain validation error message"


 def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage, datastore_path):
@@ -18,7 +18,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
        url_for("tags"),
        headers={'x-api-key': api_key}
    )
-    assert res.text.strip() == "{}", "Should be empty list"
+    assert res.get_data(as_text=True).strip() == "{}", "Should be empty list"
    assert res.status_code == 200

    res = client.post(
@@ -36,7 +36,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
        headers={'x-api-key': api_key}
    )
    assert res.status_code == 200
-    assert new_tag_uuid in res.text
+    assert new_tag_uuid in res.get_data(as_text=True)
    assert res.json[new_tag_uuid]['title'] == tag_title
    assert res.json[new_tag_uuid]['notification_muted'] == False

@@ -118,6 +118,16 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
    assert res.status_code == 200
    assert new_tag_uuid in res.json.get('tags', [])

+    # Test that tags are returned when listing ALL watches (issue #3854)
+    res = client.get(
+        url_for("createwatch"),  # GET /api/v1/watch - list all watches
+        headers={'x-api-key': api_key}
+    )
+    assert res.status_code == 200
+    assert watch_uuid in res.json, "Watch should be in the list"
+    assert 'tags' in res.json[watch_uuid], "Tags field should be present in watch list"
+    assert new_tag_uuid in res.json[watch_uuid]['tags'], "Tag UUID should be in tags array"
+
    # Check recheck by tag
    before_check_time = live_server.app.config['DATASTORE'].data['watching'][watch_uuid].get('last_checked')
    time.sleep(1)
@@ -148,7 +158,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
        headers={'x-api-key': api_key}
    )
    assert res.status_code == 200
-    assert new_tag_uuid not in res.text
+    assert new_tag_uuid not in res.get_data(as_text=True)

    # Verify tag was removed from watch
    res = client.get(
@@ -166,4 +176,57 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
    assert res.status_code == 204


+def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
+    """
+    Test the full round trip, this way we test the default Model fits back into OpenAPI spec
+    :param client:
+    :param live_server:
+    :param measure_memory_usage:
+    :param datastore_path:
+    :return:
+    """
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

+    set_original_response(datastore_path=datastore_path)
+
+    res = client.post(
+        url_for("tag"),
+        data=json.dumps({"title": "My tag title"}),
+        headers={'content-type': 'application/json', 'x-api-key': api_key}
+    )
+    assert res.status_code == 201
+
+    uuid = res.json.get('uuid')
+
+    # Now fetch it and send it back
+
+    res = client.get(
+        url_for("tag", uuid=uuid),
+        headers={'x-api-key': api_key}
+    )
+
+    tag = res.json
+
+    # Only test with date_created (readOnly field that should be filtered out)
+    # last_changed is Watch-specific and doesn't apply to Tags
+    tag['date_created'] = 454444444444
+
+    # HTTP PUT ( UPDATE an existing watch )
+    res = client.put(
+        url_for("tag", uuid=uuid),
+        headers={'x-api-key': api_key, 'content-type': 'application/json'},
+        data=json.dumps(tag),
+    )
+    if res.status_code != 200:
+        print(f"\n=== PUT failed with {res.status_code} ===")
+        print(f"Error: {res.data}")
+    assert res.status_code == 200, "HTTP PUT update was sent OK"
+
+    # Verify readOnly fields like date_created cannot be overridden
+    res = client.get(
+        url_for("tag", uuid=uuid),
+        headers={'x-api-key': api_key}
+    )
+    date_created = res.json.get('date_created')
+    assert date_created != 454444444444, "ReadOnly date_created should not be updateable"
+    assert date_created != "454444444444", "ReadOnly date_created should not be updateable"
@@ -5,6 +5,8 @@ from flask import url_for
 from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
 import os

+from ..store import ChangeDetectionStore
+

 # def test_setup(client, live_server, measure_memory_usage, datastore_path):
   #  live_server_setup(live_server) # Setup on conftest per function
@@ -487,7 +489,6 @@ def test_tag_json_persistence(client, live_server, measure_memory_usage, datasto
    - Tag deletion removes tag.json file
    """
    import json
-    from changedetectionio.store import ChangeDetectionStore

    datastore = client.application.config.get('DATASTORE')

@@ -569,9 +570,6 @@ def test_tag_json_migration_update_27(client, live_server, measure_memory_usage,
    This simulates a pre-update_27 datastore and verifies migration works.
    """
    import json
-    from changedetectionio.store import ChangeDetectionStore
-
-    datastore = client.application.config.get('DATASTORE')

    # 1. Create multiple tags
    tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']
@@ -28,7 +28,7 @@ info:
    
    For example: `x-api-key: YOUR_API_KEY`
    
-  version: 0.1.4
+  version: 0.1.6
  contact:
    name: ChangeDetection.io
    url: https://github.com/dgtlmoon/changedetection.io
@@ -126,13 +126,22 @@ components:
    WatchBase:
      type: object
      properties:
+        uuid:
+          type: string
+          format: uuid
+          description: Unique identifier
+          readOnly: true
+        date_created:
+          type: [integer, 'null']
+          description: Unix timestamp of creation
+          readOnly: true
        url:
          type: string
          format: uri
          description: URL to monitor for changes
          maxLength: 5000
        title:
-          type: string
+          type: [string, 'null']
          description: Custom title for the web page change monitor (watch), not to be confused with page_title
          maxLength: 5000
        tag:
@@ -156,56 +165,61 @@ components:
          description: HTTP method to use
        fetch_backend:
          type: string
-          enum: [html_requests, html_webdriver]
-          description: Backend to use for fetching content
+          description: |
+            Backend to use for fetching content. Common values:
+            - `system` (default) - Use the system-wide default fetcher
+            - `html_requests` - Fast requests-based fetcher
+            - `html_webdriver` - Browser-based fetcher (Playwright/Puppeteer)
+            - `extra_browser_*` - Custom browser configurations (if configured)
+            - Plugin-provided fetchers (if installed)
+          pattern: '^(system|html_requests|html_webdriver|extra_browser_.+)$'
+          default: system
        headers:
          type: object
          additionalProperties:
            type: string
          description: HTTP headers to include in requests
        body:
-          type: string
+          type: [string, 'null']
          description: HTTP request body
          maxLength: 5000
        proxy:
-          type: string
+          type: [string, 'null']
          description: Proxy configuration
          maxLength: 5000
+        ignore_status_codes:
+          type: [boolean, 'null']
+          description: Ignore HTTP status code errors (boolean or null)
        webdriver_delay:
-          type: integer
+          type: [integer, 'null']
          description: Delay in seconds for webdriver
        webdriver_js_execute_code:
-          type: string
+          type: [string, 'null']
          description: JavaScript code to execute
          maxLength: 5000
        time_between_check:
          type: object
          properties:
            weeks:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 52000
-              nullable: true
            days:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 365000
-              nullable: true
            hours:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 8760000
-              nullable: true
            minutes:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 525600000
-              nullable: true
            seconds:
-              type: integer
+              type: [integer, 'null']
              minimum: 0
              maximum: 31536000000
-              nullable: true
          description: Time intervals between checks. All fields must be non-negative. At least one non-zero value required when not using default settings.
        time_between_check_use_default:
          type: boolean
@@ -219,11 +233,11 @@ components:
          maxItems: 100
          description: Notification URLs for this web page change monitor (watch). Maximum 100 URLs.
        notification_title:
-          type: string
+          type: [string, 'null']
          description: Custom notification title
          maxLength: 5000
        notification_body:
-          type: string
+          type: [string, 'null']
          description: Custom notification body
          maxLength: 5000
        notification_format:
@@ -231,7 +245,7 @@ components:
          enum: ['text', 'html', 'htmlcolor', 'markdown', 'System default']
          description: Format for notifications
        track_ldjson_price_data:
-          type: boolean
+          type: [boolean, 'null']
          description: Whether to track JSON-LD price data
        browser_steps:
          type: array
@@ -239,17 +253,14 @@ components:
            type: object
            properties:
              operation:
-                type: string
+                type: [string, 'null']
                maxLength: 5000
-                nullable: true
              selector:
-                type: string
+                type: [string, 'null']
                maxLength: 5000
-                nullable: true
              optional_value:
-                type: string
+                type: [string, 'null']
                maxLength: 5000
-                nullable: true
            required: [operation, selector, optional_value]
            additionalProperties: false
          maxItems: 100
@@ -260,16 +271,197 @@ components:
          default: text_json_diff
          description: Optional processor mode to use for change detection. Defaults to `text_json_diff` if not specified.

+        # Content Filtering
+        include_filters:
+          type: array
+          items:
+            type: string
+            maxLength: 5000
+          maxItems: 100
+          description: CSS/XPath selectors to extract specific content from the page
+        subtractive_selectors:
+          type: array
+          items:
+            type: string
+            maxLength: 5000
+          maxItems: 100
+          description: CSS/XPath selectors to remove content from the page
+        ignore_text:
+          type: array
+          items:
+            type: string
+            maxLength: 5000
+          maxItems: 100
+          description: Text patterns to ignore in change detection
+        trigger_text:
+          type: array
+          items:
+            type: string
+            maxLength: 5000
+          maxItems: 100
+          description: Text/regex patterns that must be present to trigger a change
+        text_should_not_be_present:
+          type: array
+          items:
+            type: string
+            maxLength: 5000
+          maxItems: 100
+          description: Text that should NOT be present (triggers alert if found)
+        extract_text:
+          type: array
+          items:
+            type: string
+            maxLength: 5000
+          maxItems: 100
+          description: Regex patterns to extract specific text after filtering
+
+        # Text Processing
+        trim_text_whitespace:
+          type: boolean
+          default: false
+          description: Strip leading/trailing whitespace from text
+        sort_text_alphabetically:
+          type: boolean
+          default: false
+          description: Sort lines alphabetically before comparison
+        remove_duplicate_lines:
+          type: boolean
+          default: false
+          description: Remove duplicate lines from content
+        check_unique_lines:
+          type: boolean
+          default: false
+          description: Compare against all history for unique lines
+        strip_ignored_lines:
+          type: [boolean, 'null']
+          description: Remove lines matching ignore patterns
+
+        # Change Detection Filters
+        filter_text_added:
+          type: boolean
+          default: true
+          description: Include added text in change detection
+        filter_text_removed:
+          type: boolean
+          default: true
+          description: Include removed text in change detection
+        filter_text_replaced:
+          type: boolean
+          default: true
+          description: Include replaced text in change detection
+
+        # Restock/Price Detection
+        in_stock_only:
+          type: boolean
+          default: true
+          description: Only trigger on in-stock transitions (restock_diff processor)
+        follow_price_changes:
+          type: boolean
+          default: true
+          description: Monitor and track price changes (restock_diff processor)
+        price_change_threshold_percent:
+          type: [number, 'null']
+          description: Minimum price change percentage to trigger notification
+        has_ldjson_price_data:
+          type: [boolean, 'null']
+          description: Whether page has LD-JSON price data (auto-detected)
+          readOnly: true
+
+        # Notifications
+        notification_screenshot:
+          type: boolean
+          default: false
+          description: Include screenshot in notifications (if supported by notification URL)
+        filter_failure_notification_send:
+          type: boolean
+          default: true
+          description: Send notification when filters fail to match content
+
+        # History & Display
+        use_page_title_in_list:
+          type: [boolean, 'null']
+          description: Display page title in watch list (null = use system default)
+        history_snapshot_max_length:
+          type: [integer, 'null']
+          minimum: 1
+          maximum: 1000
+          description: Maximum number of history snapshots to keep (null = use system default)
+
+        # Scheduling
+        time_schedule_limit:
+          type: object
+          description: Weekly schedule limiting when checks can run
+          properties:
+            enabled:
+              type: boolean
+              default: false
+            monday:
+              $ref: '#/components/schemas/DaySchedule'
+            tuesday:
+              $ref: '#/components/schemas/DaySchedule'
+            wednesday:
+              $ref: '#/components/schemas/DaySchedule'
+            thursday:
+              $ref: '#/components/schemas/DaySchedule'
+            friday:
+              $ref: '#/components/schemas/DaySchedule'
+            saturday:
+              $ref: '#/components/schemas/DaySchedule'
+            sunday:
+              $ref: '#/components/schemas/DaySchedule'
+
+        # Conditions (advanced logic)
+        conditions:
+          type: array
+          items:
+            type: object
+            properties:
+              field:
+                type: string
+                description: Field to check (e.g., 'page_filtered_text', 'page_title')
+              operator:
+                type: string
+                description: Comparison operator (e.g., 'contains_regex', 'equals', 'not_equals')
+              value:
+                type: string
+                description: Value to compare against
+            required: [field, operator, value]
+          maxItems: 100
+          description: Array of condition rules for change detection logic (empty array when not set)
+        conditions_match_logic:
+          type: string
+          enum: ['ALL', 'ANY']
+          default: 'ALL'
+          description: Logic operator - ALL (match all conditions) or ANY (match any condition)
+
+    DaySchedule:
+      type: object
+      properties:
+        enabled:
+          type: boolean
+          default: true
+        start_time:
+          type: string
+          pattern: '^([0-1]?[0-9]|2[0-3]):[0-5][0-9]$'
+          default: '00:00'
+          description: Start time in HH:MM format
+        duration:
+          type: object
+          properties:
+            hours:
+              type: string
+              pattern: '^[0-9]+$'
+              default: '24'
+            minutes:
+              type: string
+              pattern: '^[0-9]+$'
+              default: '00'
+
    Watch:
      allOf:
        - $ref: '#/components/schemas/WatchBase'
        - type: object
          properties:
-            uuid:
-              type: string
-              format: uuid
-              description: Unique identifier for the web page change monitor (watch)
-              readOnly: true
            last_checked:
              type: integer
              description: Unix timestamp of last check
@@ -278,9 +470,10 @@ components:
              type: integer
              description: Unix timestamp of last change
              readOnly: true
+              x-computed: true
            last_error:
-              type: string
-              description: Last error message
+              type: [string, boolean, 'null']
+              description: Last error message (false when no error, string when error occurred, null if not checked yet)
              readOnly: true
            last_viewed:
              type: integer
@@ -291,6 +484,61 @@ components:
              format: string
              description: The watch URL rendered in case of any Jinja2 markup, always use this for listing.
              readOnly: true
+              x-computed: true
+            page_title:
+              type: [string, 'null']
+              description: HTML <title> tag extracted from the page
+              readOnly: true
+            check_count:
+              type: integer
+              description: Total number of checks performed
+              readOnly: true
+            fetch_time:
+              type: number
+              description: Duration of last fetch in seconds
+              readOnly: true
+            previous_md5:
+              type: [string, boolean]
+              description: MD5 hash of previous content (false if not set)
+              readOnly: true
+            previous_md5_before_filters:
+              type: [string, boolean]
+              description: MD5 hash before filters applied (false if not set)
+              readOnly: true
+            consecutive_filter_failures:
+              type: integer
+              description: Counter for consecutive filter match failures
+              readOnly: true
+            last_notification_error:
+              type: [string, 'null']
+              description: Last notification error message
+              readOnly: true
+            notification_alert_count:
+              type: integer
+              description: Number of notifications sent
+              readOnly: true
+            content-type:
+              type: [string, 'null']
+              description: Content-Type from last fetch
+              readOnly: true
+            remote_server_reply:
+              type: [string, 'null']
+              description: Server header from last response
+              readOnly: true
+            browser_steps_last_error_step:
+              type: [integer, 'null']
+              description: Last browser step that caused an error
+              readOnly: true
+            viewed:
+              type: [integer, boolean]
+              description: Computed property - true if watch has been viewed, false otherwise (deprecated, use last_viewed instead)
+              readOnly: true
+              x-computed: true
+            history_n:
+              type: integer
+              description: Number of history snapshots available
+              readOnly: true
+              x-computed: true

    CreateWatch:
      allOf:
@@ -301,34 +549,45 @@ components:

    UpdateWatch:
      allOf:
-        - $ref: '#/components/schemas/WatchBase'
+        - $ref: '#/components/schemas/WatchBase'  # Extends WatchBase for user-settable fields
        - type: object
          properties:
            last_viewed:
              type: integer
              description: Unix timestamp in seconds of the last time the watch was viewed. Setting it to a value higher than `last_changed` in the "Update watch" endpoint marks the watch as viewed.
              minimum: 0
+      # Note: ReadOnly and @property fields are filtered out in the backend before update
+      # We don't use unevaluatedProperties:false here to allow roundtrip GET/PUT workflows
+      # where the response includes computed fields that should be silently ignored

    Tag:
-      type: object
-      properties:
-        uuid:
-          type: string
-          format: uuid
-          description: Unique identifier for the tag
-          readOnly: true
-        title:
-          type: string
-          description: Tag title
-          maxLength: 5000
-        notification_urls:
-          type: array
-          items:
-            type: string
-          description: Default notification URLs for web page change monitors (watches) with this tag
-        notification_muted:
-          type: boolean
-          description: Whether notifications are muted for this tag
+      allOf:
+        - $ref: '#/components/schemas/WatchBase'
+        - type: object
+          properties:
+            overrides_watch:
+              type: [boolean, 'null']
+              description: |
+                Whether this tag's settings override watch settings for all watches in this tag/group.
+                - true: Tag settings override watch settings
+                - false: Tag settings do not override (watches use their own settings)
+                - null: Not decided yet / inherit default behavior
+            # Future: Aggregated statistics from all watches with this tag
+            # check_count:
+            #   type: integer
+            #   description: Sum of check_count from all watches with this tag
+            #   readOnly: true
+            #   x-computed: true
+            # last_checked:
+            #   type: integer
+            #   description: Most recent last_checked timestamp from all watches with this tag
+            #   readOnly: true
+            #   x-computed: true
+            # last_changed:
+            #   type: integer
+            #   description: Most recent last_changed timestamp from all watches with this tag
+            #   readOnly: true
+            #   x-computed: true

    CreateTag:
      allOf:
@@ -1503,46 +1762,92 @@ paths:
    post:
      operationId: importWatches
      tags: [Import]
-      summary: Import watch URLs
-      description: Import a list of URLs to monitor. Accepts line-separated URLs in request body.
+      summary: Import watch URLs with configuration
+      description: |
+        Import a list of URLs to monitor with optional watch configuration. Accepts line-separated URLs in request body.
+
+        **Configuration via Query Parameters:**
+
+        You can pass ANY watch configuration field as query parameters to apply settings to all imported watches.
+        All parameters from the Watch schema are supported (processor, fetch_backend, notification_urls, etc.).
+
+        **Special Parameters:**
+        - `tag` / `tag_uuids` - Assign tags to imported watches
+        - `proxy` - Use specific proxy for imported watches
+        - `dedupe` - Skip duplicate URLs (default: true)
+
+        **Type Conversion:**
+        - Booleans: `true`, `false`, `1`, `0`, `yes`, `no`
+        - Arrays: Comma-separated or JSON format (`[item1,item2]`)
+        - Objects: JSON format (`{"key":"value"}`)
+        - Numbers: Parsed as int or float
      x-code-samples:
        - lang: 'curl'
          source: |
+            # Basic import
            curl -X POST "http://localhost:5000/api/v1/import" \
              -H "x-api-key: YOUR_API_KEY" \
              -H "Content-Type: text/plain" \
              -d $'https://example.com\nhttps://example.org\nhttps://example.net'
+
+            # Import with processor and fetch backend
+            curl -X POST "http://localhost:5000/api/v1/import?processor=restock_diff&fetch_backend=html_webdriver" \
+              -H "x-api-key: YOUR_API_KEY" \
+              -H "Content-Type: text/plain" \
+              -d $'https://example.com\nhttps://example.org'
+
+            # Import with multiple settings
+            curl -X POST "http://localhost:5000/api/v1/import?processor=restock_diff&paused=true&tag=production" \
+              -H "x-api-key: YOUR_API_KEY" \
+              -H "Content-Type: text/plain" \
+              -d $'https://example.com'
        - lang: 'Python'
          source: |
            import requests
-            
+
            headers = {
                'x-api-key': 'YOUR_API_KEY',
                'Content-Type': 'text/plain'
            }
+
+            # Basic import
            urls = 'https://example.com\nhttps://example.org\nhttps://example.net'
-            response = requests.post('http://localhost:5000/api/v1/import', 
+            response = requests.post('http://localhost:5000/api/v1/import',
                                   headers=headers, data=urls)
            print(response.json())
+
+            # Import with configuration
+            params = {
+                'processor': 'restock_diff',
+                'fetch_backend': 'html_webdriver',
+                'paused': 'false',
+                'tag': 'production'
+            }
+            response = requests.post('http://localhost:5000/api/v1/import',
+                                   headers=headers, params=params, data=urls)
+            print(response.json())
      parameters:
        - name: tag_uuids
          in: query
-          description: Tag UUID to apply to imported web page change monitors (watches)
+          description: Tag UUID(s) to apply to imported watches (comma-separated for multiple)
          schema:
            type: string
+          example: "550e8400-e29b-41d4-a716-446655440000"
        - name: tag
          in: query
-          description: Tag name to apply to imported web page change monitors (watches)
+          description: Tag name to apply to imported watches
          schema:
            type: string
+          example: "production"
        - name: proxy
          in: query
-          description: Proxy key to use for imported web page change monitors (watches)
+          description: Proxy key to use for imported watches
          schema:
            type: string
+          example: "proxy1"
        - name: dedupe
          in: query
-          description: Remove duplicate URLs (default true)
+          description: Skip duplicate URLs (default true)
          schema:
            type: boolean
            default: true
@@ -5,7 +5,6 @@ flask-compress
 # 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
 flask-login>=0.6.3
 flask-paginate
-flask_expects_json~=1.7
 flask_restful
 flask_cors # For the Chrome extension to operate
 # janus # No longer needed - using pure threading.Queue for multi-loop support
@@ -126,8 +125,8 @@ greenlet >= 3.0.3
 # Default SOCKETIO_MODE=threading is recommended for better compatibility
 gevent

-# Pinned or it causes problems with flask_expects_json which seems unmaintained
-referencing==0.35.1
+# Previously pinned for flask_expects_json (removed 2026-02). Unpinning for now.
+referencing

 # For conditions
 panzi-json-logic
Author	SHA1	Message	Date
dgtlmoon	fffcc9af39	WIP Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-13 15:44:54 +01:00
dgtlmoon	961901c594	WIP	2026-02-13 15:15:34 +01:00
dgtlmoon	340421ea36	Minor cache	2026-02-13 14:58:23 +01:00
dgtlmoon	f29c4c8f5f	WIP	2026-02-13 14:54:34 +01:00
dgtlmoon	9702b6c8a1	Tweak message	2026-02-13 14:52:34 +01:00
dgtlmoon	798fc21f1c	WIP	2026-02-13 14:50:23 +01:00
dgtlmoon	0c6931c07c	WIP	2026-02-13 14:40:43 +01:00
dgtlmoon	60ed2a26ea	WIP	2026-02-13 14:28:56 +01:00
dgtlmoon	490ca0a663	WIP	2026-02-13 11:41:55 +01:00
dgtlmoon	10c9df288a	WIP	2026-02-13 11:24:17 +01:00
dgtlmoon	f54725d292	Increase test coverage	2026-02-13 09:18:28 +01:00
dgtlmoon	acf9e4a1e6	Remove flask_expects_json	2026-02-13 09:10:31 +01:00
dgtlmoon	7ddc0f9be0	Sync API Spec with base model	2026-02-13 09:10:04 +01:00
dgtlmoon	20f11c5c4a	Improve error logging	2026-02-13 08:49:09 +01:00
dgtlmoon	4bc01aca8d	Price tracker - Use a more memory efficient price scraper, use subprocess on linux for cleaner memory management. (#3864 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details CodeQL / Analyze (javascript) (push) Has been cancelled Details CodeQL / Analyze (python) (push) Has been cancelled Details	2026-02-11 17:21:08 +01:00
dgtlmoon	ef41dd304c	Refactoring upgrade path (#3861 )	2026-02-11 16:13:08 +01:00
dgtlmoon	5726c5a0ac	API - Import use background task to import large lists (#3858 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-11 08:15:58 +01:00
dgtlmoon	80f7decf4f	API - Bumping docs	2026-02-11 07:44:45 +01:00
dgtlmoon	c66a29b011	API - Import - Ability to set any watch value as HTTP URL Query value, for example ?processor=restock_diff&time_between_check={'hours':24} Re #3845 (#3857 )	2026-02-11 07:26:48 +01:00
dgtlmoon	a1a2e5c5bf	API - Include missing `tags` in fetching watch information. #3854 (#3856 )	2026-02-11 06:45:19 +01:00