mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-05-29 13:01:08 +00:00
Better support for watch API private vars
This commit is contained in:
@@ -7,7 +7,7 @@ import threading
|
||||
from flask import request
|
||||
from . import auth
|
||||
|
||||
from . import validate_openapi_request
|
||||
from . import validate_openapi_request, strip_internal_api_fields
|
||||
|
||||
|
||||
class Tag(Resource):
|
||||
@@ -85,7 +85,8 @@ class Tag(Resource):
|
||||
# Create clean tag dict without Watch-specific fields
|
||||
clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}
|
||||
|
||||
return clean_tag
|
||||
# Never expose `__`-prefixed transient/internal fields
|
||||
return strip_internal_api_fields(clean_tag)
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteTag')
|
||||
@@ -113,8 +114,9 @@ class Tag(Resource):
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
# Make a mutable copy of request.json for modification.
|
||||
# Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
|
||||
json_data = strip_internal_api_fields(dict(request.json))
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in json_data:
|
||||
@@ -162,7 +164,8 @@ class Tag(Resource):
|
||||
def post(self):
|
||||
"""Create a single tag/group."""
|
||||
|
||||
json_data = request.get_json()
|
||||
# Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
|
||||
json_data = strip_internal_api_fields(request.get_json())
|
||||
title = json_data.get("title",'').strip()
|
||||
|
||||
# Validate that only valid fields are provided
|
||||
|
||||
@@ -12,7 +12,7 @@ from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
import copy
|
||||
|
||||
from . import validate_openapi_request, get_readonly_watch_fields
|
||||
from . import validate_openapi_request, get_readonly_watch_fields, strip_internal_api_fields
|
||||
from ..notification import valid_notification_formats
|
||||
from ..notification.handler import newline_re
|
||||
|
||||
@@ -126,7 +126,8 @@ class Watch(Resource):
|
||||
watch['processor_config_restock_diff'] = restock_config
|
||||
watch['processor_config_restock_diff_source'] = restock_source
|
||||
|
||||
return watch
|
||||
# Never expose `__`-prefixed transient/internal fields (e.g. __check_status)
|
||||
return strip_internal_api_fields(watch)
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteWatch')
|
||||
@@ -187,8 +188,10 @@ class Watch(Resource):
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
# Make a mutable copy of request.json for modification.
|
||||
# Silently discard `__`-prefixed transient/internal keys — they are not part of the
|
||||
# public schema and must never be writable (e.g. clients that round-trip GET → PUT).
|
||||
json_data = strip_internal_api_fields(dict(request.json))
|
||||
|
||||
# Extract and remove processor config fields from json_data
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||
@@ -443,7 +446,8 @@ class CreateWatch(Resource):
|
||||
def post(self):
|
||||
"""Create a single watch."""
|
||||
|
||||
json_data = request.get_json()
|
||||
# Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
|
||||
json_data = strip_internal_api_fields(request.get_json())
|
||||
url = json_data['url'].strip()
|
||||
|
||||
if not is_safe_valid_url(url):
|
||||
|
||||
@@ -133,6 +133,43 @@ def get_tag_schema_properties():
|
||||
"""
|
||||
return _resolve_schema_properties('Tag')
|
||||
|
||||
def strip_private_keys(data):
|
||||
"""
|
||||
Remove `__`-prefixed keys from a watch/tag dict at the API boundary.
|
||||
|
||||
These are transient in-memory fields (e.g. `__check_status` set by the worker to
|
||||
surface "Fetching page..." in the UI) and are not part of the public OpenAPI
|
||||
contract. They must never appear in GET responses (otherwise a client that
|
||||
round-trips GET → PUT trips the unknown-field validator), and must be silently
|
||||
discarded from incoming PUT/POST payloads.
|
||||
|
||||
Returns a new dict; the input is not mutated.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return data
|
||||
return {k: v for k, v in data.items() if not (isinstance(k, str) and k.startswith('__'))}
|
||||
|
||||
|
||||
def strip_internal_api_fields(data):
|
||||
"""
|
||||
Strip both `__`-prefixed keys AND system-managed fields that aren't in the public
|
||||
OpenAPI spec (skip-cache hashes, LLM runtime state, processor-set status, etc.).
|
||||
|
||||
Use this at every public API boundary so GET responses and PUT/POST payloads agree
|
||||
on what's part of the contract. The set of system-managed fields lives in
|
||||
model/schema_utils.py:SYSTEM_MANAGED_NON_SPEC_FIELDS — extend it there, not here.
|
||||
|
||||
Returns a new dict; the input is not mutated.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return data
|
||||
from changedetectionio.model.schema_utils import SYSTEM_MANAGED_NON_SPEC_FIELDS
|
||||
return {
|
||||
k: v for k, v in data.items()
|
||||
if not (isinstance(k, str) and (k.startswith('__') or k in SYSTEM_MANAGED_NON_SPEC_FIELDS))
|
||||
}
|
||||
|
||||
|
||||
def validate_openapi_request(operation_id):
|
||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
||||
def decorator(f):
|
||||
|
||||
@@ -343,28 +343,14 @@ class watch_base(dict):
|
||||
return
|
||||
|
||||
# Import from shared schema utilities (no circular dependency)
|
||||
from .schema_utils import get_readonly_watch_fields
|
||||
readonly_fields = get_readonly_watch_fields()
|
||||
from .schema_utils import get_readonly_watch_fields, SYSTEM_MANAGED_NON_SPEC_FIELDS
|
||||
|
||||
# Additional system-managed fields not in OpenAPI spec (yet)
|
||||
# These are set by processors/workers and should not trigger edited flag
|
||||
additional_system_fields = {
|
||||
'last_check_status', # Set by processors
|
||||
'last_filter_config_hash', # Set by text_json_diff processor, internal skip-cache
|
||||
'restock', # Set by restock processor
|
||||
'last_viewed', # Set by mark_all_viewed endpoint
|
||||
# LLM runtime fields written back by worker/evaluator
|
||||
'_llm_result',
|
||||
'_llm_intent',
|
||||
'_llm_change_summary',
|
||||
'llm_prefilter',
|
||||
'llm_evaluation_cache',
|
||||
'llm_last_tokens_used',
|
||||
'llm_tokens_used_cumulative',
|
||||
}
|
||||
|
||||
# Only mark as edited if this is a user-writable field
|
||||
if key not in readonly_fields and key not in additional_system_fields:
|
||||
# `last_viewed` is set internally by mark_all_viewed and shouldn't flag the watch as
|
||||
# edited, but is not in SYSTEM_MANAGED_NON_SPEC_FIELDS because it IS user-writable via
|
||||
# the UpdateWatch schema (the API path).
|
||||
if (key not in get_readonly_watch_fields()
|
||||
and key != 'last_viewed'
|
||||
and key not in SYSTEM_MANAGED_NON_SPEC_FIELDS):
|
||||
self.__watch_was_edited = True
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
|
||||
@@ -8,6 +8,35 @@ Shared by both the model layer and API layer to avoid circular dependencies.
|
||||
import functools
|
||||
|
||||
|
||||
# Watch fields written by workers/processors that are NOT part of the public OpenAPI spec.
|
||||
#
|
||||
# These fields exist on a watch dict at runtime but are internal implementation details
|
||||
# (skip-cache hashes, last-check status strings, LLM runtime state, etc.). Used by:
|
||||
# - model/__init__.py: don't trigger the "edited" flag when these are written internally
|
||||
# - api/Watch.py: strip from GET responses and silently discard from PUT/POST inputs
|
||||
# so that a GET → PUT round trip doesn't trip the unknown-field validator
|
||||
#
|
||||
# `last_viewed` is intentionally NOT included: it's set internally by mark_all_viewed BUT
|
||||
# is also explicitly writable via the UpdateWatch schema (see api/Watch.py valid_fields).
|
||||
SYSTEM_MANAGED_NON_SPEC_FIELDS = frozenset({
|
||||
'last_check_status', # Set by processors
|
||||
'last_filter_config_hash', # text_json_diff internal skip-cache
|
||||
'restock', # Set by restock processor
|
||||
'_llm_result', # LLM runtime — populated by evaluator
|
||||
'_llm_intent',
|
||||
'_llm_change_summary',
|
||||
'llm_prefilter',
|
||||
'llm_evaluation_cache',
|
||||
'llm_last_tokens_used',
|
||||
'llm_tokens_used_cumulative',
|
||||
})
|
||||
|
||||
|
||||
def get_system_managed_non_spec_fields():
|
||||
"""Return the set of internal fields not in the public OpenAPI spec."""
|
||||
return SYSTEM_MANAGED_NON_SPEC_FIELDS
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
|
||||
@@ -406,6 +406,106 @@ def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path
|
||||
"extract_lines_containing should be persisted and returned via API"
|
||||
|
||||
|
||||
def test_api_strips_internal_fields(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Internal/transient fields must never cross the API boundary in either direction:
|
||||
1. `__`-prefixed keys (e.g. `__check_status` set by the worker for UI status)
|
||||
2. System-managed fields not in the OpenAPI spec (see SYSTEM_MANAGED_NON_SPEC_FIELDS):
|
||||
`last_check_status`, `last_filter_config_hash`, `_llm_*`, `llm_*`, etc.
|
||||
|
||||
GET responses must strip them. PUT/POST payloads must silently discard them.
|
||||
Without this, a client that round-trips GET → PUT trips the unknown-field validator.
|
||||
"""
|
||||
from changedetectionio.model.schema_utils import SYSTEM_MANAGED_NON_SPEC_FIELDS
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 201
|
||||
watch_uuid = res.json.get('uuid')
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Force both a transient __-prefixed and a system-managed field onto the watch,
|
||||
# simulating worker/processor-set state.
|
||||
watch_obj = datastore.data['watching'][watch_uuid]
|
||||
watch_obj['__check_status'] = 'Fetching page..'
|
||||
watch_obj['last_check_status'] = 200
|
||||
watch_obj['_llm_result'] = {'summary': 'cached llm output'}
|
||||
watch_obj['last_filter_config_hash'] = 'abc123'
|
||||
|
||||
# --- GET must strip all internal fields ---
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert not any(k.startswith('__') for k in res.json.keys()), \
|
||||
f"No __-prefixed field should leak into API responses; got keys: {list(res.json.keys())}"
|
||||
leaked_system_fields = SYSTEM_MANAGED_NON_SPEC_FIELDS & set(res.json.keys())
|
||||
assert not leaked_system_fields, \
|
||||
f"System-managed non-spec fields must not appear in GET response; leaked: {leaked_system_fields}"
|
||||
|
||||
# --- PUT must accept (and silently drop) those same internal fields ---
|
||||
# This is the key round-trip property: a client should be able to PUT back what it just GET'd.
|
||||
# Use the actual GET response as the payload (the realistic round-trip case).
|
||||
payload = dict(res.json)
|
||||
payload['__check_status'] = 'attacker-supplied value' # not in the GET, but a client could add it
|
||||
payload['last_check_status'] = 999 # ditto
|
||||
payload['_llm_result'] = 'attacker overwrite'
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps(payload),
|
||||
)
|
||||
assert res.status_code == 200, \
|
||||
f"PUT round-tripping GET response plus internal fields should succeed (got {res.status_code}: {res.data!r})"
|
||||
|
||||
# Internal fields must not have been overwritten by the PUT
|
||||
assert watch_obj.get('__check_status') == 'Fetching page..', \
|
||||
"PUT must not overwrite __-prefixed fields"
|
||||
assert watch_obj.get('_llm_result') == {'summary': 'cached llm output'}, \
|
||||
"PUT must not overwrite system-managed non-spec fields"
|
||||
|
||||
# --- POST must also silently discard internal fields ---
|
||||
# Use unique sentinel values so we can distinguish "POST persisted my value" from
|
||||
# "the worker concurrently re-set the field while processing the new watch".
|
||||
attacker_check_status = 'attacker-sentinel-__check_status-9f7c'
|
||||
attacker_llm_result = 'attacker-sentinel-_llm_result-9f7c'
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url + "?2",
|
||||
"__check_status": attacker_check_status,
|
||||
"_llm_result": attacker_llm_result,
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True,
|
||||
)
|
||||
assert res.status_code == 201, \
|
||||
f"POST with internal fields should succeed (got {res.status_code}: {res.data!r})"
|
||||
new_uuid = res.json.get('uuid')
|
||||
new_watch = datastore.data['watching'][new_uuid]
|
||||
# If POST had persisted the attacker payload these specific sentinel values would remain.
|
||||
# The worker may legitimately re-set __check_status with its own status string, that's fine.
|
||||
assert new_watch.get('__check_status') != attacker_check_status, \
|
||||
"POST must not persist __-prefixed fields from input"
|
||||
assert new_watch.get('_llm_result') != attacker_llm_result, \
|
||||
"POST must not persist system-managed fields from input"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_access_denied(client, live_server, measure_memory_usage, datastore_path):
|
||||
# `config_api_token_enabled` Should be On by default
|
||||
res = client.get(
|
||||
|
||||
Reference in New Issue
Block a user