Compare commits

..

2 Commits

Author SHA1 Message Date
dgtlmoon
4128acf95a 0.53.5
Some checks failed
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Waiting to run
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Waiting to run
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Waiting to run
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Waiting to run
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Waiting to run
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Waiting to run
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-20 00:57:52 +01:00
dgtlmoon
7c8d59c795 Fixing bad replacement of metadata causing possible content removal #3906 (#3908) 2026-02-20 00:55:37 +01:00
19 changed files with 247 additions and 598 deletions

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.53.4'
__version__ = '0.53.5'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -1,21 +0,0 @@
import functools
from flask import make_response
from flask_restful import Resource
@functools.cache
def _get_spec_yaml():
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
import yaml
from changedetectionio.api import build_merged_spec_dict
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
class Spec(Resource):
def get(self):
"""Return the merged OpenAPI spec including all registered processor extensions."""
return make_response(
_get_spec_yaml(),
200,
{'Content-Type': 'application/yaml'}
)

View File

@@ -3,18 +3,29 @@ from flask import request, abort
from loguru import logger
@functools.cache
def build_merged_spec_dict():
def get_openapi_spec():
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
import os
import yaml # Lazy import - only loaded when API validation is actually used
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
if not os.path.exists(spec_path):
# Possibly for pip3 packages
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
with open(spec_path, 'r', encoding='utf-8') as f:
spec_dict = yaml.safe_load(f)
_openapi_spec = OpenAPI.from_dict(spec_dict)
return _openapi_spec
@functools.cache
def get_openapi_schema_dict():
"""
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
Get the raw OpenAPI spec dictionary for schema access.
Each processor can provide an api.yaml file alongside its __init__.py that defines
additional schemas (e.g., processor_config_restock_diff). These are merged into
WatchBase.properties so the spec accurately reflects what the API accepts.
Plugin processors (via pluggy) are also supported - they just need an api.yaml
next to their processor module.
Returns the merged dict (cached - do not mutate the returned value).
Used by Import endpoint to validate and convert query parameters.
Returns the YAML dict directly (not the OpenAPI object).
"""
import os
import yaml
@@ -24,59 +35,7 @@ def build_merged_spec_dict():
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
with open(spec_path, 'r', encoding='utf-8') as f:
spec_dict = yaml.safe_load(f)
try:
from changedetectionio.processors import find_processors, get_parent_module
for module, proc_name in find_processors():
parent = get_parent_module(module)
if not parent or not hasattr(parent, '__file__'):
continue
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
if not os.path.exists(api_yaml_path):
continue
with open(api_yaml_path, 'r', encoding='utf-8') as f:
proc_spec = yaml.safe_load(f)
# Merge schemas
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
spec_dict['components']['schemas'].update(proc_schemas)
# Inject processor_config_{name} into WatchBase if the schema is defined
schema_key = f'processor_config_{proc_name}'
if schema_key in proc_schemas:
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
'$ref': f'#/components/schemas/{schema_key}'
}
# Append x-code-samples from processor paths into existing path operations
for path, path_item in proc_spec.get('paths', {}).items():
if path not in spec_dict.get('paths', {}):
continue
for method, operation in path_item.items():
if method not in spec_dict['paths'][path]:
continue
if 'x-code-samples' in operation:
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
except Exception as e:
logger.warning(f"Failed to merge processor API specs: {e}")
return spec_dict
@functools.cache
def get_openapi_spec():
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
return OpenAPI.from_dict(build_merged_spec_dict())
@functools.cache
def get_openapi_schema_dict():
"""
Get the raw OpenAPI spec dictionary for schema access.
Used by Import endpoint to validate and convert query parameters.
Returns the merged YAML dict (not the OpenAPI object).
"""
return build_merged_spec_dict()
return yaml.safe_load(f)
@functools.cache
def _resolve_schema_properties(schema_name):
@@ -191,6 +150,5 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
from .Tags import Tags, Tag
from .Import import Import
from .SystemInfo import SystemInfo
from .Spec import Spec
from .Notifications import Notifications

View File

@@ -160,21 +160,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
default_system_settings = datastore.data['settings'],
)
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
# The API stores processor_config_restock_diff in the tag dict; find the matching
# FormField by checking which one's sub-fields cover the config keys.
from wtforms.fields.form import FormField as WTFormField
for key, value in default.items():
if not key.startswith('processor_config_') or not isinstance(value, dict):
continue
for form_field in form:
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
for sub_key, sub_value in value.items():
sub_field = form_field.form._fields.get(sub_key)
if sub_field is not None:
sub_field.data = sub_value
break
template_args = {
'data': default,
'form': form,

View File

@@ -117,25 +117,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
processor_config = processor_instance.get_extra_watch_config(config_filename)
if processor_config:
from wtforms.fields.form import FormField
# Populate processor-config-* fields from JSON
for config_key, config_value in processor_config.items():
if not isinstance(config_value, dict):
continue
# Try exact API-named field first (e.g., processor_config_restock_diff)
target_field = getattr(form, f'processor_config_{config_key}', None)
# Fallback: find any FormField sub-form whose fields cover config_value keys
if target_field is None:
for form_field in form:
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
target_field = form_field
break
if target_field is not None:
for sub_key, sub_value in config_value.items():
sub_field = target_field.form._fields.get(sub_key)
if sub_field is not None:
sub_field.data = sub_value
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
field_name = f'processor_config_{config_key}'
if hasattr(form, field_name):
getattr(form, field_name).data = config_value
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
except Exception as e:
logger.warning(f"Failed to load processor config: {e}")

View File

@@ -40,7 +40,7 @@ from loguru import logger
from changedetectionio import __version__
from changedetectionio import queuedWatchMetaData
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
from changedetectionio.api.Search import Search
from .time_handler import is_within_schedule
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
@@ -571,8 +571,6 @@ def changedetection_app(config=None, datastore_o=None):
watch_api.add_resource(Notifications, '/api/v1/notifications',
resource_class_kwargs={'datastore': datastore})
watch_api.add_resource(Spec, '/api/v1/full-spec')
@login_manager.user_loader
def user_loader(email):
user = User()

View File

@@ -561,31 +561,33 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
)
else:
parser_config = None
if is_rss:
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
html_content = re.sub(r'</title>', r'</h1>', html_content)
else:
# Strip bloat in one pass, SPA's often dump 10Mb+ into the <head> for styles, which is not needed
# Causing inscriptis to silently exit when more than ~10MB is found.
# All we are doing here is converting the HTML to text, no CSS layout etc
# Use backreference (\1) to ensure opening/closing tags match (prevents <style> matching </svg> in CSS data URIs)
html_content = re.sub(r'<(style|script|svg|noscript)[^>]*>.*?</\1>|<(?:link|meta)[^>]*/?>|<!--.*?-->',
'', html_content, flags=re.DOTALL | re.IGNORECASE)
# Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
# causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
# can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
# causing the regex to scan past the intended close and eat real page content.
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Strip tags that inscriptis cannot render as meaningful text and which can be very large.
# svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
# video/audio/picture are kept — they may contain meaningful fallback text or captions.
for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
'math', 'canvas', 'iframe', 'template']):
tag.decompose()
# SPAs often use <body style="display:none"> to hide content until JS loads
# inscriptis respects CSS display rules, so we need to remove these hiding styles
# to extract the actual page content
body_style_pattern = r'(<body[^>]*)\s+style\s*=\s*["\']([^"\']*\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b[^"\']*)["\']'
# Check if body has hiding styles that need to be fixed
body_match = re.search(body_style_pattern, html_content, flags=re.IGNORECASE)
if body_match:
from loguru import logger
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{body_match.group(2)}')")
html_content = re.sub(body_style_pattern, r'\1', html_content, flags=re.IGNORECASE)
# SPAs often use <body style="display:none"> to hide content until JS loads.
# inscriptis respects CSS display rules, so strip hiding styles from the body tag.
body_tag = soup.find('body')
if body_tag and body_tag.get('style'):
style = body_tag['style']
if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
del body_tag['style']
html_content = str(soup)
text_content = get_text(html_content, config=parser_config)
return text_content

View File

@@ -9,15 +9,6 @@ Some suggestions for the future
- `graphical`
## API schema extension (`api.yaml`)
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
making `processor_config_<name>` a valid field on all watch create/update API calls.
The fully merged spec is served live at `/api/v1/full-spec`.
See `restock_diff/api.yaml` for a working example.
## Todo
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)

View File

@@ -67,6 +67,10 @@ class Watch(BaseWatch):
super().__init__(*arg, **kw)
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
'follow_price_changes': True,
'in_stock_processing' : 'in_stock_only'
} #@todo update
def clear_watch(self):
super().clear_watch()

View File

@@ -1,149 +0,0 @@
components:
schemas:
processor_config_restock_diff:
type: object
description: Configuration for the restock_diff processor (restock and price tracking)
properties:
in_stock_processing:
type: string
enum: [in_stock_only, all_changes, 'off']
default: in_stock_only
description: |
When to trigger on stock changes:
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
- `all_changes`: Trigger on any availability change
- `off`: Disable stock/availability tracking
follow_price_changes:
type: boolean
default: true
description: Monitor and track price changes
price_change_min:
type: [number, 'null']
description: Trigger a notification when the price drops below this value
price_change_max:
type: [number, 'null']
description: Trigger a notification when the price rises above this value
price_change_threshold_percent:
type: [number, 'null']
minimum: 0
maximum: 100
description: Minimum price change percentage since the original price to trigger a notification
paths:
/watch:
post:
x-code-samples:
- lang: 'curl'
label: 'Restock & price tracking'
source: |
curl -X POST "http://localhost:5000/api/v1/watch" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/product",
"processor": "restock_diff",
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": true,
"price_change_threshold_percent": 5
}
}'
- lang: 'Python'
label: 'Restock & price tracking'
source: |
import requests
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'application/json'
}
data = {
'url': 'https://example.com/product',
'processor': 'restock_diff',
'processor_config_restock_diff': {
'in_stock_processing': 'in_stock_only',
'follow_price_changes': True,
'price_change_threshold_percent': 5,
}
}
response = requests.post('http://localhost:5000/api/v1/watch',
headers=headers, json=data)
print(response.json())
/watch/{uuid}:
put:
x-code-samples:
- lang: 'curl'
label: 'Update restock config'
source: |
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"processor_config_restock_diff": {
"in_stock_processing": "all_changes",
"follow_price_changes": true,
"price_change_min": 10.00,
"price_change_max": 500.00
}
}'
- lang: 'Python'
label: 'Update restock config'
source: |
import requests
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'application/json'
}
uuid = 'YOUR-UUID'
data = {
'processor_config_restock_diff': {
'in_stock_processing': 'all_changes',
'follow_price_changes': True,
'price_change_min': 10.00,
'price_change_max': 500.00,
}
}
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
headers=headers, json=data)
print(response.text)
/tag/{uuid}:
put:
x-code-samples:
- lang: 'curl'
label: 'Set restock config on group/tag'
source: |
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"overrides_watch": true,
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": true,
"price_change_threshold_percent": 10
}
}'
- lang: 'Python'
label: 'Set restock config on group/tag'
source: |
import requests
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'application/json'
}
tag_uuid = 'YOUR-TAG-UUID'
data = {
'overrides_watch': True,
'processor_config_restock_diff': {
'in_stock_processing': 'in_stock_only',
'follow_price_changes': True,
'price_change_threshold_percent': 10,
}
}
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
headers=headers, json=data)
print(response.text)

View File

@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
class processor_settings_form(processor_text_json_diff_form):
processor_config_restock_diff = FormField(RestockSettingsForm)
restock_settings = FormField(RestockSettingsForm)
def extra_tab_content(self):
return _l('Restock & Price Detection')
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):
output += """
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
<script>
<script>
$(document).ready(function () {
toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
});
</script>
<fieldset id="restock-fieldset-price-group">
<div class="pure-control-group">
<fieldset class="pure-group inline-radio">
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
{{ render_field(form.restock_settings.in_stock_processing) }}
</fieldset>
<fieldset class="pure-group">
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
{{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
{{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
</fieldset>
</fieldset>
</div>
</fieldset>
"""

View File

@@ -450,18 +450,13 @@ class perform_site_check(difference_detection_processor):
)
# Which restock settings to compare against?
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
_extra_config = self.get_extra_watch_config('restock_diff.json')
restock_settings = _extra_config.get('restock_diff') or {
'follow_price_changes': True,
'in_stock_processing': 'in_stock_only',
}
restock_settings = watch.get('restock_settings', {})
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
for tag_uuid in watch.get('tags'):
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
if tag.get('overrides_watch'):
restock_settings = tag.get('processor_config_restock_diff') or {}
restock_settings = tag.get('restock_settings', {})
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
break

View File

@@ -730,48 +730,3 @@ class DatastoreUpdatesMixin:
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
self._save_settings()
def update_30(self):
"""Migrate restock_settings out of watch.json into restock_diff.json processor config file.
Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
processor config file (restock_diff.json) consistent with the processor_config_* API system.
For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
matching what the API writes when updating a tag.
Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
have processor_config_restock_diff set.
"""
import json
# --- Watches ---
for uuid, watch in self.data['watching'].items():
if watch.get('processor') != 'restock_diff':
continue
restock_settings = watch.get('restock_settings')
if not restock_settings:
continue
data_dir = watch.data_dir
if data_dir:
watch.ensure_data_dir_exists()
filepath = os.path.join(data_dir, 'restock_diff.json')
if not os.path.isfile(filepath):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump({'restock_diff': restock_settings}, f, indent=2)
logger.info(f"update_30: migrated restock_settings → {filepath}")
del self.data['watching'][uuid]['restock_settings']
watch.commit()
# --- Tags ---
for tag_uuid, tag in self.data['settings']['application']['tags'].items():
restock_settings = tag.get('restock_settings')
if not restock_settings or tag.get('processor_config_restock_diff'):
continue
tag['processor_config_restock_diff'] = restock_settings
del tag['restock_settings']
tag.commit()
logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")

View File

@@ -807,88 +807,6 @@ def test_api_import_large_background(client, live_server, measure_memory_usage,
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
def test_api_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
"""
Test that processor_config_restock_diff is accepted by the API for watches using
restock_diff processor, that its schema is validated (enum values, types), and that
genuinely unknown fields are rejected with an error that originates from the
OpenAPI spec validation layer.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
test_url = url_for('test_endpoint', _external=True)
# Create a watch in restock_diff mode WITH processor_config in the POST body (matches the API docs example)
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"processor": "restock_diff",
"title": "Restock test",
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": True,
"price_change_min": 8888888.0,
}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 201
watch_uuid = res.json.get('uuid')
assert is_valid_uuid(watch_uuid)
# Verify the value set on POST is reflected in the UI edit page (not just via PUT)
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
assert res.status_code == 200
assert b'8888888' in res.data, "price_change_min set via POST should appear in the UI edit form"
# Valid processor_config_restock_diff update via PUT should also be accepted
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"processor_config_restock_diff": {
"in_stock_processing": "all_changes",
"follow_price_changes": False,
"price_change_min": 8888888.0,
"price_change_max": 9999999.0,
}
}),
)
assert res.status_code == 200, f"Valid processor_config_restock_diff should be accepted, got: {res.data}"
# Verify the updated value is still reflected in the UI edit page
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
assert res.status_code == 200
assert b'8888888' in res.data, "price_change_min set via PUT should appear in the UI edit form"
# An invalid enum value inside processor_config_restock_diff should be rejected by the spec
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"processor_config_restock_diff": {
"in_stock_processing": "not_a_valid_enum_value"
}
}),
)
assert res.status_code == 400, "Invalid enum value in processor config should be rejected"
assert b'Validation failed' in res.data, "Rejection should come from OpenAPI spec validation layer"
# A completely unknown field should be rejected (either by OpenAPI spec validation or
# the application-level field filter — both are acceptable gatekeepers)
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"field_that_is_not_in_the_spec_at_all": "some value"}),
)
assert res.status_code == 400, "Unknown fields should be rejected"
assert (b'Validation failed' in res.data or b'Unknown field' in res.data), \
"Rejection should come from either the OpenAPI spec validation layer or application field filter"
delete_all_watches(client)
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):

View File

@@ -12,50 +12,6 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
def test_openapi_merged_spec_contains_restock_fields():
"""
Unit test: verify that build_merged_spec_dict() correctly merges the
restock_diff processor api.yaml into the base spec so that
WatchBase.properties includes processor_config_restock_diff with all
expected sub-fields. No live server required.
"""
from changedetectionio.api import build_merged_spec_dict
spec = build_merged_spec_dict()
schemas = spec['components']['schemas']
# The merged schema for processor_config_restock_diff should exist
assert 'processor_config_restock_diff' in schemas, \
"processor_config_restock_diff schema missing from merged spec"
restock_schema = schemas['processor_config_restock_diff']
props = restock_schema.get('properties', {})
expected_fields = {
'in_stock_processing',
'follow_price_changes',
'price_change_min',
'price_change_max',
'price_change_threshold_percent',
}
missing = expected_fields - set(props.keys())
assert not missing, f"Missing fields in processor_config_restock_diff schema: {missing}"
# in_stock_processing must be an enum with the three valid values
enum_values = set(props['in_stock_processing'].get('enum', []))
assert enum_values == {'in_stock_only', 'all_changes', 'off'}, \
f"Unexpected enum values for in_stock_processing: {enum_values}"
# WatchBase.properties must carry a $ref to the restock schema so the
# validation middleware can enforce it on every POST/PUT to /watch
watchbase_props = schemas['WatchBase']['properties']
assert 'processor_config_restock_diff' in watchbase_props, \
"processor_config_restock_diff not wired into WatchBase.properties"
ref = watchbase_props['processor_config_restock_diff'].get('$ref', '')
assert 'processor_config_restock_diff' in ref, \
f"WatchBase.processor_config_restock_diff should $ref the schema, got: {ref}"
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path):
"""Test that creating a watch with invalid content-type triggers OpenAPI validation error."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

View File

@@ -176,76 +176,6 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
assert res.status_code == 204
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
"""
Test that a tag/group can be updated with processor_config_restock_diff via the API.
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
# Create a tag
res = client.post(
url_for("tag"),
data=json.dumps({"title": "Restock Group"}),
headers={'content-type': 'application/json', 'x-api-key': api_key}
)
assert res.status_code == 201
tag_uuid = res.json.get('uuid')
# Update tag with valid processor_config_restock_diff
res = client.put(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"overrides_watch": True,
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": True,
"price_change_min": 8888888
}
})
)
assert res.status_code == 200, f"PUT tag with restock config failed: {res.data}"
# Verify the config was stored via API
res = client.get(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
tag_data = res.json
assert tag_data.get('overrides_watch') == True
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only'
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 8888888
# Verify the value is also reflected in the UI tag edit page
res = client.get(url_for("tags.form_tag_edit", uuid=tag_uuid))
assert res.status_code == 200
assert b'8888888' in res.data, "price_change_min set via API should appear in the UI tag edit form"
# Invalid enum value should be rejected by OpenAPI spec validation
res = client.put(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"processor_config_restock_diff": {
"in_stock_processing": "not_a_valid_value"
}
})
)
assert res.status_code == 400
assert b'Validation failed' in res.data
# Clean up
res = client.delete(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 204
def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
"""
Test the full round trip, this way we test the default Model fits back into OpenAPI spec

View File

@@ -109,7 +109,7 @@ def test_itemprop_price_change(client, live_server, measure_memory_usage, datast
set_original_response(props_markup=instock_props[0], price='120.45', datastore_path=datastore_path)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"processor_config_restock_diff-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
data={"restock_settings-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -204,9 +204,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form, datastore_path):
def test_restock_itemprop_minmax(client, live_server, measure_memory_usage, datastore_path):
extras = {
"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_min": 900.0,
"processor_config_restock_diff-price_change_max": 1100.10
"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_min": 900.0,
"restock_settings-price_change_max": 1100.10
}
_run_test_minmax_limit(client, extra_watch_edit_form=extras, datastore_path=datastore_path)
@@ -223,9 +223,9 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da
res = client.post(
url_for("tags.form_tag_edit_submit", uuid="first"),
data={"name": "test-tag",
"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_min": 900.0,
"processor_config_restock_diff-price_change_max": 1100.10,
"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_min": 900.0,
"restock_settings-price_change_max": 1100.10,
"overrides_watch": "y", #overrides_watch should be restock_overrides_watch
},
follow_redirects=True
@@ -258,8 +258,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_threshold_percent": 5.0,
data={"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_threshold_percent": 5.0,
"url": test_url,
"tags": "",
"headers": "",
@@ -305,8 +305,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_threshold_percent": 5.05,
data={"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_threshold_percent": 5.05,
"processor": "text_json_diff",
"url": test_url,
'fetch_backend': "html_requests",

View File

@@ -453,6 +453,175 @@ class TestHtmlToText(unittest.TestCase):
def test_script_with_closing_tag_in_string_does_not_eat_content(self):
"""
Script tag containing </script> inside a JS string must not prematurely end the block.
This is the classic regex failure mode: the old pattern would find the first </script>
inside the JS string literal and stop there, leaving the tail of the script block
(plus any following content) exposed as raw text. BS4 parses the HTML correctly.
"""
html = '''<html><body>
<p>Before script</p>
<script>
var html = "<div>foo<\\/script><p>bar</p>";
var also = 1;
</script>
<p>AFTER SCRIPT</p>
</body></html>'''
text = html_to_text(html)
assert 'Before script' in text
assert 'AFTER SCRIPT' in text
# Script internals must not leak
assert 'var html' not in text
assert 'var also' not in text
def test_content_sandwiched_between_multiple_body_scripts(self):
"""Content between multiple script/style blocks in the body must all survive."""
html = '''<html><body>
<script>var a = 1;</script>
<p>CONTENT A</p>
<style>.x { color: red; }</style>
<p>CONTENT B</p>
<script>var b = 2;</script>
<p>CONTENT C</p>
<style>.y { color: blue; }</style>
<p>CONTENT D</p>
</body></html>'''
text = html_to_text(html)
for label in ['CONTENT A', 'CONTENT B', 'CONTENT C', 'CONTENT D']:
assert label in text, f"'{label}' was eaten by script/style stripping"
assert 'var a' not in text
assert 'var b' not in text
assert 'color: red' not in text
assert 'color: blue' not in text
def test_unicode_and_international_content_preserved(self):
"""Non-ASCII content (umlauts, CJK, soft hyphens) must survive stripping."""
html = '''<html><body>
<style>.x{color:red}</style>
<p>German: Aus\xadge\xadbucht! — ANMELDUNG — Fan\xadday 2026</p>
<p>Chinese: \u6ce8\u518c</p>
<p>Japanese: \u767b\u9332</p>
<p>Korean: \ub4f1\ub85d</p>
<p>Emoji: \U0001f4e2</p>
<script>var x = 1;</script>
</body></html>'''
text = html_to_text(html)
assert 'ANMELDUNG' in text
assert '\u6ce8\u518c' in text # Chinese
assert '\u767b\u9332' in text # Japanese
assert '\ub4f1\ub85d' in text # Korean
def test_style_with_type_attribute_is_stripped(self):
"""<style type="text/css"> (with type attribute) must be stripped just like bare <style>."""
html = '''<html><body>
<style type="text/css">.important { display: none; }</style>
<p>VISIBLE CONTENT</p>
</body></html>'''
text = html_to_text(html)
assert 'VISIBLE CONTENT' in text
assert '.important' not in text
assert 'display: none' not in text
def test_ldjson_script_is_stripped(self):
"""<script type="application/ld+json"> must be stripped — raw JSON must not appear as text."""
html = '''<html><body>
<script type="application/ld+json">
{"@type": "Product", "name": "Widget", "price": "9.99"}
</script>
<p>PRODUCT PAGE</p>
</body></html>'''
text = html_to_text(html)
assert 'PRODUCT PAGE' in text
assert '@type' not in text
assert '"price"' not in text
def test_inline_svg_is_stripped_entirely(self):
"""
Inline SVG elements in the body are stripped by BS4 before passing to inscriptis.
SVGs can be huge (icon libraries, data visualisations) and produce garbage path-data
text. The old regex code explicitly stripped <svg>; the BS4 path must do the same.
"""
html = '''<html><body>
<p>Before SVG</p>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M14 5L7 12L14 19Z" fill="none"/>
<circle cx="12" cy="12" r="10"/>
</svg>
<p>After SVG</p>
</body></html>'''
text = html_to_text(html)
assert 'Before SVG' in text
assert 'After SVG' in text
assert 'M14 5L7' not in text, "SVG path data should not appear in text output"
assert 'viewBox' not in text, "SVG attributes should not appear in text output"
def test_tag_inside_json_data_attribute_does_not_eat_content(self):
"""
Tags inside JSON data attributes with JS-escaped closing tags must not eat real content.
Real-world case: Elementor/JetEngine WordPress widgets embed HTML (including SVG icons)
inside JSON data attributes like data-slider-atts. The HTML inside is JS-escaped, so
closing tags appear as <\\/svg> rather than </svg>.
The old regex approach would find <svg> inside the attribute value, then fail to find
<\/svg> as a matching close tag, and scan forward to the next real </svg> in the DOM —
eating tens of kilobytes of actual page content in the process.
"""
html = '''<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<div class="slider" data-slider-atts="{&quot;prevArrow&quot;:&quot;<i class=\\&quot;icon\\&quot;><svg width=\\&quot;24\\&quot; height=\\&quot;24\\&quot; viewBox=\\&quot;0 0 24 24\\&quot; xmlns=\\&quot;http:\\/\\/www.w3.org\\/2000\\/svg\\&quot;><path d=\\&quot;M14 5L7 12L14 19\\&quot;\\/><\\/svg><\\/i>&quot;}">
</div>
<div class="content">
<h1>IMPORTANT CONTENT</h1>
<p>This text must not be eaten by the tag-stripping logic.</p>
</div>
<svg><circle cx="50" cy="50" r="40"/></svg>
</body>
</html>'''
text = html_to_text(html)
assert 'IMPORTANT CONTENT' in text, (
"Content after a JS-escaped tag in a data attribute was incorrectly stripped. "
"The tag-stripping logic is matching <tag> inside attribute values and scanning "
"forward to the next real closing tag in the DOM."
)
assert 'This text must not be eaten' in text
def test_script_inside_json_data_attribute_does_not_eat_content(self):
"""Same issue as above but with <script> embedded in a data attribute with JS-escaped closing tag."""
html = '''<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<div data-config="{&quot;template&quot;:&quot;<script type=\\&quot;text\\/javascript\\&quot;>var x=1;<\\/script>&quot;}">
</div>
<div>
<h1>MUST SURVIVE</h1>
<p>Real content after the data attribute with embedded script tag.</p>
</div>
<script>var real = 1;</script>
</body>
</html>'''
text = html_to_text(html)
assert 'MUST SURVIVE' in text, (
"Content after a JS-escaped <script> in a data attribute was incorrectly stripped."
)
assert 'Real content after the data attribute' in text
if __name__ == '__main__':
# Can run this file directly for quick testing
unittest.main()

View File

@@ -108,14 +108,9 @@ tags:
- name: System Information
description: |
Retrieve system status and statistics about your changedetection.io instance, including total watch
Retrieve system status and statistics about your changedetection.io instance, including total watch
counts, uptime information, and version details.
- name: Plugin API Extensions
description: |
Retrieve the live OpenAPI specification for this instance. Unlike the static spec, this endpoint
returns the fully merged spec including schemas for any processor plugins installed on this instance.
components:
securitySchemes:
ApiKeyAuth:
@@ -1894,7 +1889,7 @@ paths:
- lang: 'Python'
source: |
import requests
headers = {'x-api-key': 'YOUR_API_KEY'}
response = requests.get('http://localhost:5000/api/v1/systeminfo', headers=headers)
print(response.json())
@@ -1910,27 +1905,3 @@ paths:
tag_count: 5
uptime: "2 days, 3:45:12"
version: "0.50.10"
/full-spec:
get:
operationId: getFullApiSpec
tags: [Plugin API Extensions]
summary: Get full live API spec
description: |
Return the fully merged OpenAPI specification for this instance.
Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
Use this URL with Swagger UI or Redoc to get accurate documentation for your specific install.
security: []
x-code-samples:
- lang: 'curl'
source: |
curl -X GET "http://localhost:5000/api/v1/full-spec"
responses:
'200':
description: Merged OpenAPI specification in YAML format
content:
application/yaml:
schema:
type: string