0.53.5

Fixing bad replacement of metadata causing possible content removal #3906 (#3908 )
2026-02-20 21:26:08 +00:00 · 2026-02-20 00:57:52 +01:00 · 2026-02-20 00:55:37 +01:00
19 changed files with 247 additions and 598 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki
 # Semver means never use .01, or 00. Should be .1.
-__version__ = '0.53.4'
+__version__ = '0.53.5'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/api/Spec.py
+++ b/changedetectionio/api/Spec.py
@@ -1,21 +0,0 @@
-import functools
-from flask import make_response
-from flask_restful import Resource
-
-
-@functools.cache
-def _get_spec_yaml():
-    """Build and cache the merged spec as a YAML string (only serialized once per process)."""
-    import yaml
-    from changedetectionio.api import build_merged_spec_dict
-    return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
-
-
-class Spec(Resource):
-    def get(self):
-        """Return the merged OpenAPI spec including all registered processor extensions."""
-        return make_response(
-            _get_spec_yaml(),
-            200,
-            {'Content-Type': 'application/yaml'}
-        )
--- a/changedetectionio/api/init.py
+++ b/changedetectionio/api/init.py
@@ -3,18 +3,29 @@ from flask import request, abort
 from loguru import logger

@functools.cache
-def build_merged_spec_dict():
+def get_openapi_spec():
+    """Lazy load OpenAPI spec and dependencies only when validation is needed."""
+    import os
+    import yaml  # Lazy import - only loaded when API validation is actually used
+    from openapi_core import OpenAPI  # Lazy import - saves ~10.7 MB on startup
+
+    spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
+    if not os.path.exists(spec_path):
+        # Possibly for pip3 packages
+        spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
+
+    with open(spec_path, 'r', encoding='utf-8') as f:
+        spec_dict = yaml.safe_load(f)
+    _openapi_spec = OpenAPI.from_dict(spec_dict)
+    return _openapi_spec
+
+@functools.cache
+def get_openapi_schema_dict():
    """
-    Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
+    Get the raw OpenAPI spec dictionary for schema access.

-    Each processor can provide an api.yaml file alongside its __init__.py that defines
-    additional schemas (e.g., processor_config_restock_diff). These are merged into
-    WatchBase.properties so the spec accurately reflects what the API accepts.
-
-    Plugin processors (via pluggy) are also supported - they just need an api.yaml
-    next to their processor module.
-
-    Returns the merged dict (cached - do not mutate the returned value).
+    Used by Import endpoint to validate and convert query parameters.
+    Returns the YAML dict directly (not the OpenAPI object).
    """
    import os
    import yaml
@@ -24,59 +35,7 @@ def build_merged_spec_dict():
        spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')

    with open(spec_path, 'r', encoding='utf-8') as f:
-        spec_dict = yaml.safe_load(f)
-
-    try:
-        from changedetectionio.processors import find_processors, get_parent_module
-        for module, proc_name in find_processors():
-            parent = get_parent_module(module)
-            if not parent or not hasattr(parent, '__file__'):
-                continue
-            api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
-            if not os.path.exists(api_yaml_path):
-                continue
-            with open(api_yaml_path, 'r', encoding='utf-8') as f:
-                proc_spec = yaml.safe_load(f)
-            # Merge schemas
-            proc_schemas = proc_spec.get('components', {}).get('schemas', {})
-            spec_dict['components']['schemas'].update(proc_schemas)
-            # Inject processor_config_{name} into WatchBase if the schema is defined
-            schema_key = f'processor_config_{proc_name}'
-            if schema_key in proc_schemas:
-                spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
-                    '$ref': f'#/components/schemas/{schema_key}'
-                }
-            # Append x-code-samples from processor paths into existing path operations
-            for path, path_item in proc_spec.get('paths', {}).items():
-                if path not in spec_dict.get('paths', {}):
-                    continue
-                for method, operation in path_item.items():
-                    if method not in spec_dict['paths'][path]:
-                        continue
-                    if 'x-code-samples' in operation:
-                        existing = spec_dict['paths'][path][method].get('x-code-samples', [])
-                        spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
-    except Exception as e:
-        logger.warning(f"Failed to merge processor API specs: {e}")
-
-    return spec_dict
-
-
-@functools.cache
-def get_openapi_spec():
-    """Lazy load OpenAPI spec and dependencies only when validation is needed."""
-    from openapi_core import OpenAPI  # Lazy import - saves ~10.7 MB on startup
-    return OpenAPI.from_dict(build_merged_spec_dict())
-
-@functools.cache
-def get_openapi_schema_dict():
-    """
-    Get the raw OpenAPI spec dictionary for schema access.
-
-    Used by Import endpoint to validate and convert query parameters.
-    Returns the merged YAML dict (not the OpenAPI object).
-    """
-    return build_merged_spec_dict()
+        return yaml.safe_load(f)

@functools.cache
 def _resolve_schema_properties(schema_name):
@@ -191,6 +150,5 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
 from .Tags import Tags, Tag
 from .Import import Import
 from .SystemInfo import SystemInfo
-from .Spec import Spec
 from .Notifications import Notifications

--- a/changedetectionio/blueprint/tags/init.py
+++ b/changedetectionio/blueprint/tags/init.py
@@ -160,21 +160,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                                       default_system_settings = datastore.data['settings'],
                                       )

-        # Bridge API-stored processor_config_* values into the form's FormField sub-forms.
-        # The API stores processor_config_restock_diff in the tag dict; find the matching
-        # FormField by checking which one's sub-fields cover the config keys.
-        from wtforms.fields.form import FormField as WTFormField
-        for key, value in default.items():
-            if not key.startswith('processor_config_') or not isinstance(value, dict):
-                continue
-            for form_field in form:
-                if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
-                    for sub_key, sub_value in value.items():
-                        sub_field = form_field.form._fields.get(sub_key)
-                        if sub_field is not None:
-                            sub_field.data = sub_value
-                    break
-
        template_args = {
            'data': default,
            'form': form,
--- a/changedetectionio/blueprint/ui/edit.py
+++ b/changedetectionio/blueprint/ui/edit.py
@@ -117,25 +117,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
                processor_config = processor_instance.get_extra_watch_config(config_filename)

                if processor_config:
-                    from wtforms.fields.form import FormField
                    # Populate processor-config-* fields from JSON
                    for config_key, config_value in processor_config.items():
-                        if not isinstance(config_value, dict):
-                            continue
-                        # Try exact API-named field first (e.g., processor_config_restock_diff)
-                        target_field = getattr(form, f'processor_config_{config_key}', None)
-                        # Fallback: find any FormField sub-form whose fields cover config_value keys
-                        if target_field is None:
-                            for form_field in form:
-                                if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
-                                    target_field = form_field
-                                    break
-                        if target_field is not None:
-                            for sub_key, sub_value in config_value.items():
-                                sub_field = target_field.form._fields.get(sub_key)
-                                if sub_field is not None:
-                                    sub_field.data = sub_value
-                                    logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
+                        field_name = f'processor_config_{config_key}'
+                        if hasattr(form, field_name):
+                            getattr(form, field_name).data = config_value
+                            logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
            except Exception as e:
                logger.warning(f"Failed to load processor config: {e}")

--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -40,7 +40,7 @@ from loguru import logger

 from changedetectionio import __version__
 from changedetectionio import queuedWatchMetaData
-from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
+from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
 from changedetectionio.api.Search import Search
 from .time_handler import is_within_schedule
 from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
@@ -571,8 +571,6 @@ def changedetection_app(config=None, datastore_o=None):
    watch_api.add_resource(Notifications, '/api/v1/notifications',
                           resource_class_kwargs={'datastore': datastore})

-    watch_api.add_resource(Spec, '/api/v1/full-spec')
-
    @login_manager.user_loader
    def user_loader(email):
        user = User()
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -561,31 +561,33 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
        )
    else:
        parser_config = None
-
    if is_rss:
        html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
        html_content = re.sub(r'</title>', r'</h1>', html_content)
    else:
-        # Strip bloat in one pass, SPA's often dump 10Mb+ into the <head> for styles, which is not needed
-        # Causing inscriptis to silently exit when more than ~10MB is found.
-        # All we are doing here is converting the HTML to text, no CSS layout etc
-        # Use backreference (\1) to ensure opening/closing tags match (prevents <style> matching </svg> in CSS data URIs)
-        html_content = re.sub(r'<(style|script|svg|noscript)[^>]*>.*?</\1>|<(?:link|meta)[^>]*/?>|<!--.*?-->',
-                              '', html_content, flags=re.DOTALL | re.IGNORECASE)
+        # Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
+        # causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
+        # can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
+        # causing the regex to scan past the intended close and eat real page content.
+        from bs4 import BeautifulSoup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        # Strip tags that inscriptis cannot render as meaningful text and which can be very large.
+        # svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
+        # video/audio/picture are kept — they may contain meaningful fallback text or captions.
+        for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
+                                  'math', 'canvas', 'iframe', 'template']):
+            tag.decompose()

-        # SPAs often use <body style="display:none"> to hide content until JS loads
-        # inscriptis respects CSS display rules, so we need to remove these hiding styles
-        # to extract the actual page content
-        body_style_pattern = r'(<body[^>]*)\s+style\s*=\s*["\']([^"\']*\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b[^"\']*)["\']'
-
-        # Check if body has hiding styles that need to be fixed
-        body_match = re.search(body_style_pattern, html_content, flags=re.IGNORECASE)
-        if body_match:
-            from loguru import logger
-            logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{body_match.group(2)}')")
-
-        html_content = re.sub(body_style_pattern, r'\1', html_content, flags=re.IGNORECASE)
+        # SPAs often use <body style="display:none"> to hide content until JS loads.
+        # inscriptis respects CSS display rules, so strip hiding styles from the body tag.
+        body_tag = soup.find('body')
+        if body_tag and body_tag.get('style'):
+            style = body_tag['style']
+            if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
+                logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
+                del body_tag['style']

+        html_content = str(soup)

    text_content = get_text(html_content, config=parser_config)
    return text_content
--- a/changedetectionio/processors/README.md
+++ b/changedetectionio/processors/README.md
@@ -9,15 +9,6 @@ Some suggestions for the future

 - `graphical` 

-## API schema extension (`api.yaml`)
-
-A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
-Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
-making `processor_config_<name>` a valid field on all watch create/update API calls.
-The fully merged spec is served live at `/api/v1/full-spec`.
-
-See `restock_diff/api.yaml` for a working example.
-
 ## Todo

 - Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
--- a/changedetectionio/processors/restock_diff/init.py
+++ b/changedetectionio/processors/restock_diff/init.py
@@ -67,6 +67,10 @@ class Watch(BaseWatch):
        super().__init__(*arg, **kw)
        self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()

+        self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
+            'follow_price_changes': True,
+            'in_stock_processing' : 'in_stock_only'
+        } #@todo update

    def clear_watch(self):
        super().clear_watch()
--- a/changedetectionio/processors/restock_diff/api.yaml
+++ b/changedetectionio/processors/restock_diff/api.yaml
@@ -1,149 +0,0 @@
-components:
-  schemas:
-    processor_config_restock_diff:
-      type: object
-      description: Configuration for the restock_diff processor (restock and price tracking)
-      properties:
-        in_stock_processing:
-          type: string
-          enum: [in_stock_only, all_changes, 'off']
-          default: in_stock_only
-          description: |
-            When to trigger on stock changes:
-            - `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
-            - `all_changes`: Trigger on any availability change
-            - `off`: Disable stock/availability tracking
-        follow_price_changes:
-          type: boolean
-          default: true
-          description: Monitor and track price changes
-        price_change_min:
-          type: [number, 'null']
-          description: Trigger a notification when the price drops below this value
-        price_change_max:
-          type: [number, 'null']
-          description: Trigger a notification when the price rises above this value
-        price_change_threshold_percent:
-          type: [number, 'null']
-          minimum: 0
-          maximum: 100
-          description: Minimum price change percentage since the original price to trigger a notification
-
-paths:
-  /watch:
-    post:
-      x-code-samples:
-        - lang: 'curl'
-          label: 'Restock & price tracking'
-          source: |
-            curl -X POST "http://localhost:5000/api/v1/watch" \
-              -H "x-api-key: YOUR_API_KEY" \
-              -H "Content-Type: application/json" \
-              -d '{
-                "url": "https://example.com/product",
-                "processor": "restock_diff",
-                "processor_config_restock_diff": {
-                  "in_stock_processing": "in_stock_only",
-                  "follow_price_changes": true,
-                  "price_change_threshold_percent": 5
-                }
-              }'
-        - lang: 'Python'
-          label: 'Restock & price tracking'
-          source: |
-            import requests
-
-            headers = {
-                'x-api-key': 'YOUR_API_KEY',
-                'Content-Type': 'application/json'
-            }
-            data = {
-                'url': 'https://example.com/product',
-                'processor': 'restock_diff',
-                'processor_config_restock_diff': {
-                    'in_stock_processing': 'in_stock_only',
-                    'follow_price_changes': True,
-                    'price_change_threshold_percent': 5,
-                }
-            }
-            response = requests.post('http://localhost:5000/api/v1/watch',
-                                     headers=headers, json=data)
-            print(response.json())
-
-  /watch/{uuid}:
-    put:
-      x-code-samples:
-        - lang: 'curl'
-          label: 'Update restock config'
-          source: |
-            curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
-              -H "x-api-key: YOUR_API_KEY" \
-              -H "Content-Type: application/json" \
-              -d '{
-                "processor_config_restock_diff": {
-                  "in_stock_processing": "all_changes",
-                  "follow_price_changes": true,
-                  "price_change_min": 10.00,
-                  "price_change_max": 500.00
-                }
-              }'
-        - lang: 'Python'
-          label: 'Update restock config'
-          source: |
-            import requests
-
-            headers = {
-                'x-api-key': 'YOUR_API_KEY',
-                'Content-Type': 'application/json'
-            }
-            uuid = 'YOUR-UUID'
-            data = {
-                'processor_config_restock_diff': {
-                    'in_stock_processing': 'all_changes',
-                    'follow_price_changes': True,
-                    'price_change_min': 10.00,
-                    'price_change_max': 500.00,
-                }
-            }
-            response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
-                                    headers=headers, json=data)
-            print(response.text)
-
-  /tag/{uuid}:
-    put:
-      x-code-samples:
-        - lang: 'curl'
-          label: 'Set restock config on group/tag'
-          source: |
-            curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
-              -H "x-api-key: YOUR_API_KEY" \
-              -H "Content-Type: application/json" \
-              -d '{
-                "overrides_watch": true,
-                "processor_config_restock_diff": {
-                  "in_stock_processing": "in_stock_only",
-                  "follow_price_changes": true,
-                  "price_change_threshold_percent": 10
-                }
-              }'
-        - lang: 'Python'
-          label: 'Set restock config on group/tag'
-          source: |
-            import requests
-
-            headers = {
-                'x-api-key': 'YOUR_API_KEY',
-                'Content-Type': 'application/json'
-            }
-            tag_uuid = 'YOUR-TAG-UUID'
-            data = {
-                'overrides_watch': True,
-                'processor_config_restock_diff': {
-                    'in_stock_processing': 'in_stock_only',
-                    'follow_price_changes': True,
-                    'price_change_threshold_percent': 10,
-                }
-            }
-            response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
-                                    headers=headers, json=data)
-            print(response.text)
--- a/changedetectionio/processors/restock_diff/forms.py
+++ b/changedetectionio/processors/restock_diff/forms.py
@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
    follow_price_changes = BooleanField(_l('Follow price changes'), default=True)

 class processor_settings_form(processor_text_json_diff_form):
-    processor_config_restock_diff = FormField(RestockSettingsForm)
+    restock_settings = FormField(RestockSettingsForm)

    def extra_tab_content(self):
        return _l('Restock & Price Detection')
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):

        output += """
        {% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
-        <script>
+        <script>        
            $(document).ready(function () {
-                toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
+                toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
            });
        </script>

        <fieldset id="restock-fieldset-price-group">
            <div class="pure-control-group">
                <fieldset class="pure-group inline-radio">
-                    {{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
+                    {{ render_field(form.restock_settings.in_stock_processing) }}
                </fieldset>
                <fieldset class="pure-group">
-                    {{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
+                    {{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
                    <span class="pure-form-message-inline">Changes in price should trigger a notification</span>
                </fieldset>
-                <fieldset class="pure-group price-change-minmax">
-                    {{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
+                <fieldset class="pure-group price-change-minmax">               
+                    {{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
                    <span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
                </fieldset>
                <fieldset class="pure-group price-change-minmax">
-                    {{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
+                    {{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
                    <span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
                </fieldset>
                <fieldset class="pure-group price-change-minmax">
-                    {{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
+                    {{ render_field(form.restock_settings.price_change_threshold_percent) }}
                    <span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
                    <span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
-                </fieldset>
+                </fieldset>                
            </div>
        </fieldset>
        """
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -450,18 +450,13 @@ class perform_site_check(difference_detection_processor):
                                            )

        # Which restock settings to compare against?
-        # Settings are stored in restock_diff.json (migrated from watch.json by update_30).
-        _extra_config = self.get_extra_watch_config('restock_diff.json')
-        restock_settings = _extra_config.get('restock_diff') or {
-            'follow_price_changes': True,
-            'in_stock_processing': 'in_stock_only',
-        }
+        restock_settings = watch.get('restock_settings', {})

        # See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
        for tag_uuid in watch.get('tags'):
            tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
            if tag.get('overrides_watch'):
-                restock_settings = tag.get('processor_config_restock_diff') or {}
+                restock_settings = tag.get('restock_settings', {})
                logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
                break

--- a/changedetectionio/store/updates.py
+++ b/changedetectionio/store/updates.py
@@ -730,48 +730,3 @@ class DatastoreUpdatesMixin:
        # (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
        self._save_settings()

-    def update_30(self):
-        """Migrate restock_settings out of watch.json into restock_diff.json processor config file.
-
-        Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
-        were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
-        processor config file (restock_diff.json) consistent with the processor_config_* API system.
-
-        For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
-        matching what the API writes when updating a tag.
-
-        Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
-        have processor_config_restock_diff set.
-        """
-        import json
-
-        # --- Watches ---
-        for uuid, watch in self.data['watching'].items():
-            if watch.get('processor') != 'restock_diff':
-                continue
-            restock_settings = watch.get('restock_settings')
-            if not restock_settings:
-                continue
-
-            data_dir = watch.data_dir
-            if data_dir:
-                watch.ensure_data_dir_exists()
-                filepath = os.path.join(data_dir, 'restock_diff.json')
-                if not os.path.isfile(filepath):
-                    with open(filepath, 'w', encoding='utf-8') as f:
-                        json.dump({'restock_diff': restock_settings}, f, indent=2)
-                    logger.info(f"update_30: migrated restock_settings → {filepath}")
-
-            del self.data['watching'][uuid]['restock_settings']
-            watch.commit()
-
-        # --- Tags ---
-        for tag_uuid, tag in self.data['settings']['application']['tags'].items():
-            restock_settings = tag.get('restock_settings')
-            if not restock_settings or tag.get('processor_config_restock_diff'):
-                continue
-            tag['processor_config_restock_diff'] = restock_settings
-            del tag['restock_settings']
-            tag.commit()
-            logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")
-
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -807,88 +807,6 @@ def test_api_import_large_background(client, live_server, measure_memory_usage,
    print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")


-def test_api_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
-    """
-    Test that processor_config_restock_diff is accepted by the API for watches using
-    restock_diff processor, that its schema is validated (enum values, types), and that
-    genuinely unknown fields are rejected with an error that originates from the
-    OpenAPI spec validation layer.
-    """
-    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
-    test_url = url_for('test_endpoint', _external=True)
-
-    # Create a watch in restock_diff mode WITH processor_config in the POST body (matches the API docs example)
-    res = client.post(
-        url_for("createwatch"),
-        data=json.dumps({
-            "url": test_url,
-            "processor": "restock_diff",
-            "title": "Restock test",
-            "processor_config_restock_diff": {
-                "in_stock_processing": "in_stock_only",
-                "follow_price_changes": True,
-                "price_change_min": 8888888.0,
-            }
-        }),
-        headers={'content-type': 'application/json', 'x-api-key': api_key},
-        follow_redirects=True
-    )
-    assert res.status_code == 201
-    watch_uuid = res.json.get('uuid')
-    assert is_valid_uuid(watch_uuid)
-
-    # Verify the value set on POST is reflected in the UI edit page (not just via PUT)
-    res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
-    assert res.status_code == 200
-    assert b'8888888' in res.data, "price_change_min set via POST should appear in the UI edit form"
-
-    # Valid processor_config_restock_diff update via PUT should also be accepted
-    res = client.put(
-        url_for("watch", uuid=watch_uuid),
-        headers={'x-api-key': api_key, 'content-type': 'application/json'},
-        data=json.dumps({
-            "processor_config_restock_diff": {
-                "in_stock_processing": "all_changes",
-                "follow_price_changes": False,
-                "price_change_min": 8888888.0,
-                "price_change_max": 9999999.0,
-            }
-        }),
-    )
-    assert res.status_code == 200, f"Valid processor_config_restock_diff should be accepted, got: {res.data}"
-
-    # Verify the updated value is still reflected in the UI edit page
-    res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
-    assert res.status_code == 200
-    assert b'8888888' in res.data, "price_change_min set via PUT should appear in the UI edit form"
-
-    # An invalid enum value inside processor_config_restock_diff should be rejected by the spec
-    res = client.put(
-        url_for("watch", uuid=watch_uuid),
-        headers={'x-api-key': api_key, 'content-type': 'application/json'},
-        data=json.dumps({
-            "processor_config_restock_diff": {
-                "in_stock_processing": "not_a_valid_enum_value"
-            }
-        }),
-    )
-    assert res.status_code == 400, "Invalid enum value in processor config should be rejected"
-    assert b'Validation failed' in res.data, "Rejection should come from OpenAPI spec validation layer"
-
-    # A completely unknown field should be rejected (either by OpenAPI spec validation or
-    # the application-level field filter — both are acceptable gatekeepers)
-    res = client.put(
-        url_for("watch", uuid=watch_uuid),
-        headers={'x-api-key': api_key, 'content-type': 'application/json'},
-        data=json.dumps({"field_that_is_not_in_the_spec_at_all": "some value"}),
-    )
-    assert res.status_code == 400, "Unknown fields should be rejected"
-    assert (b'Validation failed' in res.data or b'Unknown field' in res.data), \
-        "Rejection should come from either the OpenAPI spec validation layer or application field filter"
-
-    delete_all_watches(client)
-
-
 def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):


--- a/changedetectionio/tests/test_api_openapi.py
+++ b/changedetectionio/tests/test_api_openapi.py
@@ -12,50 +12,6 @@ from flask import url_for
 from .util import live_server_setup, wait_for_all_checks, delete_all_watches


-def test_openapi_merged_spec_contains_restock_fields():
-    """
-    Unit test: verify that build_merged_spec_dict() correctly merges the
-    restock_diff processor api.yaml into the base spec so that
-    WatchBase.properties includes processor_config_restock_diff with all
-    expected sub-fields.  No live server required.
-    """
-    from changedetectionio.api import build_merged_spec_dict
-
-    spec = build_merged_spec_dict()
-    schemas = spec['components']['schemas']
-
-    # The merged schema for processor_config_restock_diff should exist
-    assert 'processor_config_restock_diff' in schemas, \
-        "processor_config_restock_diff schema missing from merged spec"
-
-    restock_schema = schemas['processor_config_restock_diff']
-    props = restock_schema.get('properties', {})
-
-    expected_fields = {
-        'in_stock_processing',
-        'follow_price_changes',
-        'price_change_min',
-        'price_change_max',
-        'price_change_threshold_percent',
-    }
-    missing = expected_fields - set(props.keys())
-    assert not missing, f"Missing fields in processor_config_restock_diff schema: {missing}"
-
-    # in_stock_processing must be an enum with the three valid values
-    enum_values = set(props['in_stock_processing'].get('enum', []))
-    assert enum_values == {'in_stock_only', 'all_changes', 'off'}, \
-        f"Unexpected enum values for in_stock_processing: {enum_values}"
-
-    # WatchBase.properties must carry a $ref to the restock schema so the
-    # validation middleware can enforce it on every POST/PUT to /watch
-    watchbase_props = schemas['WatchBase']['properties']
-    assert 'processor_config_restock_diff' in watchbase_props, \
-        "processor_config_restock_diff not wired into WatchBase.properties"
-    ref = watchbase_props['processor_config_restock_diff'].get('$ref', '')
-    assert 'processor_config_restock_diff' in ref, \
-        f"WatchBase.processor_config_restock_diff should $ref the schema, got: {ref}"
-
-
 def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path):
    """Test that creating a watch with invalid content-type triggers OpenAPI validation error."""
    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
--- a/changedetectionio/tests/test_api_tags.py
+++ b/changedetectionio/tests/test_api_tags.py
@@ -176,76 +176,6 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
    assert res.status_code == 204


-def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
-    """
-    Test that a tag/group can be updated with processor_config_restock_diff via the API.
-    Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
-    """
-    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
-
-    set_original_response(datastore_path=datastore_path)
-
-    # Create a tag
-    res = client.post(
-        url_for("tag"),
-        data=json.dumps({"title": "Restock Group"}),
-        headers={'content-type': 'application/json', 'x-api-key': api_key}
-    )
-    assert res.status_code == 201
-    tag_uuid = res.json.get('uuid')
-
-    # Update tag with valid processor_config_restock_diff
-    res = client.put(
-        url_for("tag", uuid=tag_uuid),
-        headers={'x-api-key': api_key, 'content-type': 'application/json'},
-        data=json.dumps({
-            "overrides_watch": True,
-            "processor_config_restock_diff": {
-                "in_stock_processing": "in_stock_only",
-                "follow_price_changes": True,
-                "price_change_min": 8888888
-            }
-        })
-    )
-    assert res.status_code == 200, f"PUT tag with restock config failed: {res.data}"
-
-    # Verify the config was stored via API
-    res = client.get(
-        url_for("tag", uuid=tag_uuid),
-        headers={'x-api-key': api_key}
-    )
-    assert res.status_code == 200
-    tag_data = res.json
-    assert tag_data.get('overrides_watch') == True
-    assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only'
-    assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 8888888
-
-    # Verify the value is also reflected in the UI tag edit page
-    res = client.get(url_for("tags.form_tag_edit", uuid=tag_uuid))
-    assert res.status_code == 200
-    assert b'8888888' in res.data, "price_change_min set via API should appear in the UI tag edit form"
-
-    # Invalid enum value should be rejected by OpenAPI spec validation
-    res = client.put(
-        url_for("tag", uuid=tag_uuid),
-        headers={'x-api-key': api_key, 'content-type': 'application/json'},
-        data=json.dumps({
-            "processor_config_restock_diff": {
-                "in_stock_processing": "not_a_valid_value"
-            }
-        })
-    )
-    assert res.status_code == 400
-    assert b'Validation failed' in res.data
-
-    # Clean up
-    res = client.delete(
-        url_for("tag", uuid=tag_uuid),
-        headers={'x-api-key': api_key}
-    )
-    assert res.status_code == 204
-
-
 def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
    """
    Test the full round trip, this way we test the default Model fits back into OpenAPI spec
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -109,7 +109,7 @@ def test_itemprop_price_change(client, live_server, measure_memory_usage, datast
    set_original_response(props_markup=instock_props[0], price='120.45', datastore_path=datastore_path)
    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first"),
-        data={"processor_config_restock_diff-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
+        data={"restock_settings-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@@ -204,9 +204,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form, datastore_path):
 def test_restock_itemprop_minmax(client, live_server, measure_memory_usage, datastore_path):
    
    extras = {
-        "processor_config_restock_diff-follow_price_changes": "y",
-        "processor_config_restock_diff-price_change_min": 900.0,
-        "processor_config_restock_diff-price_change_max": 1100.10
+        "restock_settings-follow_price_changes": "y",
+        "restock_settings-price_change_min": 900.0,
+        "restock_settings-price_change_max": 1100.10
    }
    _run_test_minmax_limit(client, extra_watch_edit_form=extras, datastore_path=datastore_path)

@@ -223,9 +223,9 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da
    res = client.post(
        url_for("tags.form_tag_edit_submit", uuid="first"),
        data={"name": "test-tag",
-              "processor_config_restock_diff-follow_price_changes": "y",
-              "processor_config_restock_diff-price_change_min": 900.0,
-              "processor_config_restock_diff-price_change_max": 1100.10,
+              "restock_settings-follow_price_changes": "y",
+              "restock_settings-price_change_min": 900.0,
+              "restock_settings-price_change_max": 1100.10,
              "overrides_watch": "y", #overrides_watch should be restock_overrides_watch
              },
        follow_redirects=True
@@ -258,8 +258,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d

    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first"),
-        data={"processor_config_restock_diff-follow_price_changes": "y",
-              "processor_config_restock_diff-price_change_threshold_percent": 5.0,
+        data={"restock_settings-follow_price_changes": "y",
+              "restock_settings-price_change_threshold_percent": 5.0,
              "url": test_url,
              "tags": "",
              "headers": "",
@@ -305,8 +305,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d

    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid=uuid),
-        data={"processor_config_restock_diff-follow_price_changes": "y",
-              "processor_config_restock_diff-price_change_threshold_percent": 5.05,
+        data={"restock_settings-follow_price_changes": "y",
+              "restock_settings-price_change_threshold_percent": 5.05,
              "processor": "text_json_diff",
              "url": test_url,
              'fetch_backend': "html_requests",
--- a/changedetectionio/tests/unit/test_html_to_text.py
+++ b/changedetectionio/tests/unit/test_html_to_text.py
@@ -453,6 +453,175 @@ class TestHtmlToText(unittest.TestCase):



+    def test_script_with_closing_tag_in_string_does_not_eat_content(self):
+        """
+        Script tag containing </script> inside a JS string must not prematurely end the block.
+
+        This is the classic regex failure mode: the old pattern would find the first </script>
+        inside the JS string literal and stop there, leaving the tail of the script block
+        (plus any following content) exposed as raw text. BS4 parses the HTML correctly.
+        """
+        html = '''<html><body>
+<p>Before script</p>
+<script>
+var html = "<div>foo<\\/script><p>bar</p>";
+var also = 1;
+</script>
+<p>AFTER SCRIPT</p>
+</body></html>'''
+
+        text = html_to_text(html)
+        assert 'Before script' in text
+        assert 'AFTER SCRIPT' in text
+        # Script internals must not leak
+        assert 'var html' not in text
+        assert 'var also' not in text
+
+    def test_content_sandwiched_between_multiple_body_scripts(self):
+        """Content between multiple script/style blocks in the body must all survive."""
+        html = '''<html><body>
+<script>var a = 1;</script>
+<p>CONTENT A</p>
+<style>.x { color: red; }</style>
+<p>CONTENT B</p>
+<script>var b = 2;</script>
+<p>CONTENT C</p>
+<style>.y { color: blue; }</style>
+<p>CONTENT D</p>
+</body></html>'''
+
+        text = html_to_text(html)
+        for label in ['CONTENT A', 'CONTENT B', 'CONTENT C', 'CONTENT D']:
+            assert label in text, f"'{label}' was eaten by script/style stripping"
+        assert 'var a' not in text
+        assert 'var b' not in text
+        assert 'color: red' not in text
+        assert 'color: blue' not in text
+
+    def test_unicode_and_international_content_preserved(self):
+        """Non-ASCII content (umlauts, CJK, soft hyphens) must survive stripping."""
+        html = '''<html><body>
+<style>.x{color:red}</style>
+<p>German: Aus\xadge\xadbucht! — ANMELDUNG — Fan\xadday 2026</p>
+<p>Chinese: \u6ce8\u518c</p>
+<p>Japanese: \u767b\u9332</p>
+<p>Korean: \ub4f1\ub85d</p>
+<p>Emoji: \U0001f4e2</p>
+<script>var x = 1;</script>
+</body></html>'''
+
+        text = html_to_text(html)
+        assert 'ANMELDUNG' in text
+        assert '\u6ce8\u518c' in text   # Chinese
+        assert '\u767b\u9332' in text   # Japanese
+        assert '\ub4f1\ub85d' in text   # Korean
+
+    def test_style_with_type_attribute_is_stripped(self):
+        """<style type="text/css"> (with type attribute) must be stripped just like bare <style>."""
+        html = '''<html><body>
+<style type="text/css">.important { display: none; }</style>
+<p>VISIBLE CONTENT</p>
+</body></html>'''
+
+        text = html_to_text(html)
+        assert 'VISIBLE CONTENT' in text
+        assert '.important' not in text
+        assert 'display: none' not in text
+
+    def test_ldjson_script_is_stripped(self):
+        """<script type="application/ld+json"> must be stripped — raw JSON must not appear as text."""
+        html = '''<html><body>
+<script type="application/ld+json">
+{"@type": "Product", "name": "Widget", "price": "9.99"}
+</script>
+<p>PRODUCT PAGE</p>
+</body></html>'''
+
+        text = html_to_text(html)
+        assert 'PRODUCT PAGE' in text
+        assert '@type' not in text
+        assert '"price"' not in text
+
+    def test_inline_svg_is_stripped_entirely(self):
+        """
+        Inline SVG elements in the body are stripped by BS4 before passing to inscriptis.
+        SVGs can be huge (icon libraries, data visualisations) and produce garbage path-data
+        text. The old regex code explicitly stripped <svg>; the BS4 path must do the same.
+        """
+        html = '''<html><body>
+<p>Before SVG</p>
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+    <path d="M14 5L7 12L14 19Z" fill="none"/>
+    <circle cx="12" cy="12" r="10"/>
+</svg>
+<p>After SVG</p>
+</body></html>'''
+
+        text = html_to_text(html)
+        assert 'Before SVG' in text
+        assert 'After SVG' in text
+        assert 'M14 5L7' not in text, "SVG path data should not appear in text output"
+        assert 'viewBox' not in text, "SVG attributes should not appear in text output"
+
+    def test_tag_inside_json_data_attribute_does_not_eat_content(self):
+        """
+        Tags inside JSON data attributes with JS-escaped closing tags must not eat real content.
+
+        Real-world case: Elementor/JetEngine WordPress widgets embed HTML (including SVG icons)
+        inside JSON data attributes like data-slider-atts. The HTML inside is JS-escaped, so
+        closing tags appear as <\\/svg> rather than </svg>.
+
+        The old regex approach would find <svg> inside the attribute value, then fail to find
+        <\/svg> as a matching close tag, and scan forward to the next real </svg> in the DOM —
+        eating tens of kilobytes of actual page content in the process.
+        """
+        html = '''<!DOCTYPE html>
+<html>
+<head><title>Test</title></head>
+<body>
+<div class="slider" data-slider-atts="{&quot;prevArrow&quot;:&quot;<i class=\\&quot;icon\\&quot;><svg width=\\&quot;24\\&quot; height=\\&quot;24\\&quot; viewBox=\\&quot;0 0 24 24\\&quot; xmlns=\\&quot;http:\\/\\/www.w3.org\\/2000\\/svg\\&quot;><path d=\\&quot;M14 5L7 12L14 19\\&quot;\\/><\\/svg><\\/i>&quot;}">
+</div>
+<div class="content">
+    <h1>IMPORTANT CONTENT</h1>
+    <p>This text must not be eaten by the tag-stripping logic.</p>
+</div>
+<svg><circle cx="50" cy="50" r="40"/></svg>
+</body>
+</html>'''
+
+        text = html_to_text(html)
+
+        assert 'IMPORTANT CONTENT' in text, (
+            "Content after a JS-escaped tag in a data attribute was incorrectly stripped. "
+            "The tag-stripping logic is matching <tag> inside attribute values and scanning "
+            "forward to the next real closing tag in the DOM."
+        )
+        assert 'This text must not be eaten' in text
+
+    def test_script_inside_json_data_attribute_does_not_eat_content(self):
+        """Same issue as above but with <script> embedded in a data attribute with JS-escaped closing tag."""
+        html = '''<!DOCTYPE html>
+<html>
+<head><title>Test</title></head>
+<body>
+<div data-config="{&quot;template&quot;:&quot;<script type=\\&quot;text\\/javascript\\&quot;>var x=1;<\\/script>&quot;}">
+</div>
+<div>
+    <h1>MUST SURVIVE</h1>
+    <p>Real content after the data attribute with embedded script tag.</p>
+</div>
+<script>var real = 1;</script>
+</body>
+</html>'''
+
+        text = html_to_text(html)
+
+        assert 'MUST SURVIVE' in text, (
+            "Content after a JS-escaped <script> in a data attribute was incorrectly stripped."
+        )
+        assert 'Real content after the data attribute' in text
+
+
 if __name__ == '__main__':
    # Can run this file directly for quick testing
    unittest.main()
--- a/docs/api-spec.yaml
+++ b/docs/api-spec.yaml
@@ -108,14 +108,9 @@ tags:
      
  - name: System Information
    description: |
-      Retrieve system status and statistics about your changedetection.io instance, including total watch
+      Retrieve system status and statistics about your changedetection.io instance, including total watch 
      counts, uptime information, and version details.

-  - name: Plugin API Extensions
-    description: |
-      Retrieve the live OpenAPI specification for this instance. Unlike the static spec, this endpoint
-      returns the fully merged spec including schemas for any processor plugins installed on this instance.
-
 components:
  securitySchemes:
    ApiKeyAuth:
@@ -1894,7 +1889,7 @@ paths:
        - lang: 'Python'
          source: |
            import requests
-
+            
            headers = {'x-api-key': 'YOUR_API_KEY'}
            response = requests.get('http://localhost:5000/api/v1/systeminfo', headers=headers)
            print(response.json())
@@ -1910,27 +1905,3 @@ paths:
                tag_count: 5
                uptime: "2 days, 3:45:12"
                version: "0.50.10"
-
-  /full-spec:
-    get:
-      operationId: getFullApiSpec
-      tags: [Plugin API Extensions]
-      summary: Get full live API spec
-      description: |
-        Return the fully merged OpenAPI specification for this instance.
-
-        Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
-        spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
-        Use this URL with Swagger UI or Redoc to get accurate documentation for your specific install.
-      security: []
-      x-code-samples:
-        - lang: 'curl'
-          source: |
-            curl -X GET "http://localhost:5000/api/v1/full-spec"
-      responses:
-        '200':
-          description: Merged OpenAPI specification in YAML format
-          content:
-            application/yaml:
-              schema:
-                type: string
Author	SHA1	Message	Date
dgtlmoon	4128acf95a	0.53.5 Some checks failed ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Waiting to run Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Waiting to run Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Waiting to run Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Waiting to run Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Waiting to run Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Waiting to run Details Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-20 00:57:52 +01:00
dgtlmoon	7c8d59c795	Fixing bad replacement of metadata causing possible content removal #3906 (#3908 )	2026-02-20 00:55:37 +01:00