Better support for watch API private vars

LLM - Fixing summary cache miss-hit (#4136 )
LLM - UI - Message that 'AI Intent' (triggers) need a bigger model
2026-05-31 05:51:25 +00:00 · 2026-05-15 12:21:18 +02:00 · 2026-05-12 17:44:36 +02:00 · 2026-05-12 17:40:54 +02:00 · 2026-05-12 17:36:39 +02:00 · 2026-05-12 17:28:43 +02:00
28 changed files with 460 additions and 95 deletions
@@ -7,7 +7,7 @@ import threading
 from flask import request
 from . import auth

-from . import validate_openapi_request
+from . import validate_openapi_request, strip_internal_api_fields


 class Tag(Resource):
@@ -85,7 +85,8 @@ class Tag(Resource):
        # Create clean tag dict without Watch-specific fields
        clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}

-        return clean_tag
+        # Never expose `__`-prefixed transient/internal fields
+        return strip_internal_api_fields(clean_tag)

    @auth.check_token
    @validate_openapi_request('deleteTag')
@@ -113,8 +114,9 @@ class Tag(Resource):
        if not tag:
            abort(404, message='No tag exists with the UUID of {}'.format(uuid))

-        # Make a mutable copy of request.json for modification
-        json_data = dict(request.json)
+        # Make a mutable copy of request.json for modification.
+        # Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
+        json_data = strip_internal_api_fields(dict(request.json))

        # Validate notification_urls if provided
        if 'notification_urls' in json_data:
@@ -162,7 +164,8 @@ class Tag(Resource):
    def post(self):
        """Create a single tag/group."""

-        json_data = request.get_json()
+        # Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
+        json_data = strip_internal_api_fields(request.get_json())
        title = json_data.get("title",'').strip()

        # Validate that only valid fields are provided
@@ -12,7 +12,7 @@ from flask_restful import abort, Resource
 from loguru import logger
 import copy

-from . import validate_openapi_request, get_readonly_watch_fields
+from . import validate_openapi_request, get_readonly_watch_fields, strip_internal_api_fields
 from ..notification import valid_notification_formats
 from ..notification.handler import newline_re

@@ -126,7 +126,8 @@ class Watch(Resource):
        watch['processor_config_restock_diff'] = restock_config
        watch['processor_config_restock_diff_source'] = restock_source

-        return watch
+        # Never expose `__`-prefixed transient/internal fields (e.g. __check_status)
+        return strip_internal_api_fields(watch)

    @auth.check_token
    @validate_openapi_request('deleteWatch')
@@ -187,8 +188,10 @@ class Watch(Resource):
        # Handle processor-config-* fields separately (save to JSON, not datastore)
        from changedetectionio import processors

-        # Make a mutable copy of request.json for modification
-        json_data = dict(request.json)
+        # Make a mutable copy of request.json for modification.
+        # Silently discard `__`-prefixed transient/internal keys — they are not part of the
+        # public schema and must never be writable (e.g. clients that round-trip GET → PUT).
+        json_data = strip_internal_api_fields(dict(request.json))

        # Extract and remove processor config fields from json_data
        processor_config_data = processors.extract_processor_config_from_form_data(json_data)
@@ -443,7 +446,8 @@ class CreateWatch(Resource):
    def post(self):
        """Create a single watch."""

-        json_data = request.get_json()
+        # Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
+        json_data = strip_internal_api_fields(request.get_json())
        url = json_data['url'].strip()

        if not is_safe_valid_url(url):
@@ -133,6 +133,43 @@ def get_tag_schema_properties():
    """
    return _resolve_schema_properties('Tag')

+def strip_private_keys(data):
+    """
+    Remove `__`-prefixed keys from a watch/tag dict at the API boundary.
+
+    These are transient in-memory fields (e.g. `__check_status` set by the worker to
+    surface "Fetching page..." in the UI) and are not part of the public OpenAPI
+    contract. They must never appear in GET responses (otherwise a client that
+    round-trips GET → PUT trips the unknown-field validator), and must be silently
+    discarded from incoming PUT/POST payloads.
+
+    Returns a new dict; the input is not mutated.
+    """
+    if not isinstance(data, dict):
+        return data
+    return {k: v for k, v in data.items() if not (isinstance(k, str) and k.startswith('__'))}
+
+
+def strip_internal_api_fields(data):
+    """
+    Strip both `__`-prefixed keys AND system-managed fields that aren't in the public
+    OpenAPI spec (skip-cache hashes, LLM runtime state, processor-set status, etc.).
+
+    Use this at every public API boundary so GET responses and PUT/POST payloads agree
+    on what's part of the contract. The set of system-managed fields lives in
+    model/schema_utils.py:SYSTEM_MANAGED_NON_SPEC_FIELDS — extend it there, not here.
+
+    Returns a new dict; the input is not mutated.
+    """
+    if not isinstance(data, dict):
+        return data
+    from changedetectionio.model.schema_utils import SYSTEM_MANAGED_NON_SPEC_FIELDS
+    return {
+        k: v for k, v in data.items()
+        if not (isinstance(k, str) and (k.startswith('__') or k in SYSTEM_MANAGED_NON_SPEC_FIELDS))
+    }
+
+
 def validate_openapi_request(operation_id):
    """Decorator to validate incoming requests against OpenAPI spec."""
    def decorator(f):
@@ -30,6 +30,10 @@
      <div class="stab-overview-text">
        <strong>{{ _('Intent filtering') }}</strong>
        <p>{{ _('Each watch or tag can carry a plain-text intent — %(ex1)s or %(ex2)s. On every detected change the AI evaluates the diff against it and suppresses irrelevant noise.', ex1='<strong>"notify me only when the price drops"</strong>', ex2='<strong>"alert when the item goes out of stock"</strong>') | safe }}</p>
+        <p><small>{{ _('Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very small models (≤3B) may misjudge numeric comparisons.',
+              local='<code>qwen2.5:7b</code>',
+              gpt='<code>gpt-4o-mini</code>',
+              gemini='<code>gemini-2.0-flash</code>') | safe }}</small></p>
      </div>
    </div>
    <div class="stab-overview-feature">
@@ -198,10 +198,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        best_from = watch.get_from_version_based_on_last_viewed
        from_version      = request.args.get('from_version', best_from if best_from else dates[-2])
        to_version        = request.args.get('to_version', dates[-1])
-        all_changes       = request.args.get('all_changes', '0') == '1'
-        ignore_whitespace = request.args.get('ignore_whitespace', '0') == '1'
-        show_removed      = request.args.get('removed', '1') == '1'
-        show_added        = request.args.get('added', '1') == '1'
+        from changedetectionio.llm.evaluator import DiffPrefs
+        prefs             = DiffPrefs.from_request_args(request.args)
+        all_changes       = prefs.all_changes
+        ignore_whitespace = prefs.ignore_whitespace
+        show_removed      = prefs.show_removed
+        show_added        = prefs.show_added

        def _prep(text):
            """Optionally normalise whitespace on each line before diffing."""
@@ -263,21 +265,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            return jsonify({'summary': None, 'error': 'No differences found'})

        from changedetectionio.llm.evaluator import (
-            summarise_change, get_effective_summary_prompt,
+            summarise_change, get_effective_summary_prompt, build_summary_cache_prompt,
            is_global_token_budget_exceeded, get_global_token_budget_month,
            LLMInputTooLargeError,
        )

-        effective_prompt = get_effective_summary_prompt(watch, datastore)
-        from changedetectionio.llm.prompt_builder import build_change_summary_system_prompt
-        # Diff-pref flags + system prompt are part of the cache key so prompt changes bust the cache
+        # Diff-pref flags + system prompt are part of the cache key so prompt changes bust the cache.
        _max_summary_tokens = datastore.data['settings']['application'].get('llm_max_summary_tokens', 3000)
-        cache_prompt = (
-            effective_prompt
-            + f'\x00prefs:all={int(all_changes)},ws={int(ignore_whitespace)}'
-              f',rm={int(show_removed)},add={int(show_added)}'
-            + f'\x00sys:{build_change_summary_system_prompt()}'
-            + f'\x00max_tokens:{_max_summary_tokens}'
+        cache_prompt = build_summary_cache_prompt(
+            effective_prompt=get_effective_summary_prompt(watch, datastore),
+            max_summary_tokens=_max_summary_tokens,
+            prefs=prefs,
        )

        # Check cache — keyed by version pair + prompt hash (invalidates if prompt changes)
@@ -16,6 +16,7 @@ Environment variable overrides (take priority over datastore settings):

 import hashlib
 import os
+from dataclasses import dataclass
 from datetime import datetime, timezone
 from loguru import logger

@@ -86,8 +87,30 @@ LLM_DEFAULT_MAX_SUMMARY_TOKENS = 3000
 # to scale; cloud-LLM users hit this default unmodified, preserving prior cost defaults.
 JSON_RESPONSE_MAX_TOKENS = 400

-# Default prompt used when the user hasn't configured llm_change_summary
-DEFAULT_CHANGE_SUMMARY_PROMPT = "Describe in plain English what changed — list what was added or removed as bullet points, including key details for each item. Be careful of content that merely just moved around, you should mention that it moved but dont report that it was added/removed etc. Be considerate of the style content you are summarising the change of, adjust your report accordingly. Do not quote non-English text verbatim; translate and summarise all content into English. Your entire response must be in English."
+# Default prompt used when the user hasn't configured llm_change_summary.
+# This owns the OUTPUT FORMAT (structure, sections, style, language). The system prompt
+# in prompt_builder.build_change_summary_system_prompt() only covers how to READ the diff.
+# Users can replace this entirely (e.g. "Just tell me the new timestamp.") without
+# fighting hard-coded structure rules from the system prompt.
+DEFAULT_CHANGE_SUMMARY_PROMPT = (
+    "Describe what changed in plain English using these sections, in this fixed order — "
+    "omit a section entirely if there is nothing to report for it:\n"
+    "  Added: ...\n"
+    "  Changed: ...\n"
+    "  Removed: ...\n"
+    "The Removed section MUST always be last. Never place removals before additions or changes.\n\n"
+    "List items as bullet points with key details for each one. Be considerate of the style "
+    "of content you are summarising and adjust your report accordingly.\n"
+    "Do not list standalone timestamps like '3 hours ago', 'Yesterday', '2 minutes ago' as added "
+    "or removed items — they are not meaningful content changes.\n"
+    "For content-heavy pages (news, listings, feeds): quote or paraphrase the specific new "
+    "headlines, items, or entries that were added — do not collapse them into vague phrases "
+    "like 'new articles were added' or 'section was expanded'.\n"
+    "For large blocks of new text (full articles, documents, long paragraphs): briefly summarise "
+    "the substance in 1-2 sentences capturing the key point — do not just repeat the title.\n\n"
+    "Do not quote non-English text verbatim; translate and summarise all content into English. "
+    "Your entire response must be in English."
+)


 def _summary_max_tokens(diff: str, max_cap: int = LLM_DEFAULT_MAX_SUMMARY_TOKENS) -> int:
@@ -416,6 +439,58 @@ def compute_summary_cache_key(diff_text: str, prompt: str) -> str:
    return h.hexdigest()[:16]


+@dataclass(frozen=True)
+class DiffPrefs:
+    """
+    User-facing diff display preferences. Part of the LLM summary cache key so
+    that toggling a preference produces a fresh summary.
+
+    Field defaults are the single source of truth — the UI query-arg defaults in
+    diff.py's from_request_args() and the worker pre-cache's bare DiffPrefs()
+    both rely on these.
+    """
+    all_changes:       bool = False
+    ignore_whitespace: bool = False
+    show_removed:      bool = True
+    show_added:        bool = True
+
+    @classmethod
+    def from_request_args(cls, args) -> 'DiffPrefs':
+        """Parse from a Flask request.args (or any .get(key, default)-shaped mapping)."""
+        return cls(
+            all_changes       = args.get('all_changes', '0') == '1',
+            ignore_whitespace = args.get('ignore_whitespace', '0') == '1',
+            show_removed      = args.get('removed', '1') == '1',
+            show_added        = args.get('added', '1') == '1',
+        )
+
+    def cache_key_suffix(self) -> str:
+        return (
+            f'\x00prefs:all={int(self.all_changes)},ws={int(self.ignore_whitespace)}'
+            f',rm={int(self.show_removed)},add={int(self.show_added)}'
+        )
+
+
+def build_summary_cache_prompt(effective_prompt: str, max_summary_tokens: int,
+                                prefs: DiffPrefs = None) -> str:
+    """
+    Compose the full cache-key string passed to save/get_llm_diff_summary.
+
+    Default prefs are DiffPrefs() — must match the UI's query-arg defaults so a
+    worker-side pre-cache is hit by an unmodified UI request. Same helper must
+    be used by both the worker pre-cache write and the UI diff route read,
+    otherwise the prompt hashes diverge and the cache file isn't found.
+    """
+    if prefs is None:
+        prefs = DiffPrefs()
+    return (
+        effective_prompt
+        + prefs.cache_key_suffix()
+        + f'\x00sys:{build_change_summary_system_prompt()}'
+        + f'\x00max_tokens:{max_summary_tokens}'
+    )
+
+
 def summarise_change(watch, datastore, diff: str, current_snapshot: str = '') -> str:
    """
    Generate a plain-language summary of the change using the watch's
@@ -79,7 +79,13 @@ def build_eval_system_prompt() -> str:
        "Rules:\n"
        "- important=true ONLY when the diff clearly and specifically matches the intent — be strict\n"
        "- Pay close attention to direction: an intent about price drops means removed (-) prices and added (+) lower prices\n"
-        "- Empty, trivial, or cosmetic diffs (timestamps, counters, whitespace, navigation) → important=false\n"
+        "- The user's intent always wins. If the intent explicitly asks about timestamps, numbers, counters, "
+        "thresholds, or any specific value (e.g. 'when the timestamp is greater than 1778599592', "
+        "'when stock count > 5'), evaluate the diff against that intent — do NOT dismiss it as cosmetic.\n"
+        "- Otherwise: empty, trivial, or genuinely cosmetic diffs (heartbeat timestamps, view counters, "
+        "whitespace, navigation tweaks) default to important=false\n"
+        "- For numeric comparisons in the intent, parse the values explicitly and compare them — "
+        "do not eyeball or round\n"
        "- If the same text appears in both removed (-) and added (+) lines the content has likely just "
        "shifted or been reordered. Treat pure reordering as important=false unless the intent "
        "explicitly asks about order or position.\n"
@@ -130,7 +136,14 @@ def build_change_summary_prompt(diff: str, custom_prompt: str,
    """
    Build the user message for an AI Change Summary call.
    The user supplies their own instructions (custom_prompt); this wraps them
-    with the diff and optional page context.
+    with the diff (which carries its own surrounding context via unified_diff's
+    n=3 context lines, marked '~' by _annotate_moved_lines).
+
+    NOTE: current_snapshot is accepted for caller compatibility but intentionally
+    unused. A wholesale page excerpt caused the LLM to report unchanged page
+    content (e.g. old release-note bullets) as "what changed" — hallucinations
+    drawn from the excerpt rather than the diff. The in-diff context lines give
+    the model enough surrounding text to describe each change accurately.
    """
    parts = []
    if url:
@@ -138,42 +151,33 @@ def build_change_summary_prompt(diff: str, custom_prompt: str,
    if title:
        parts.append(f"Page title: {title}")
    parts.append(f"Instructions: {custom_prompt}")
-    if current_snapshot:
-        excerpt = trim_to_relevant(current_snapshot, custom_prompt, max_chars=2_000)
-        if excerpt:
-            parts.append(f"\nCurrent page (excerpt):\n{excerpt}")
    parts.append(f"\nWhat changed (diff):\n{_annotate_moved_lines(diff)}")
    return '\n'.join(parts)


 def build_change_summary_system_prompt() -> str:
+    """
+    Universal, format-agnostic instructions: how to READ a diff and accuracy rules.
+    All output-format choices (prose vs JSON, sections, bullets, language, length)
+    are owned by the user prompt — including the default in
+    DEFAULT_CHANGE_SUMMARY_PROMPT — so that a user replacing the user-prompt
+    (e.g. asking for raw JSON) is not overridden by hard-coded format rules here.
+    """
    return (
-        "You are a meticulous, accurate summariser of website changes for monitoring notifications.\n"
-        "Your goal is to describe exactly what changed — never omit significant details, "
-        "never add information that isn't in the diff, and never speculate.\n\n"
+        "You analyse a unified-diff document showing how a monitored web page changed, "
+        "and produce exactly the output the user asks for.\n\n"
        "Rules for reading the diff:\n"
-        "- Lines starting with + are genuinely new content. List them specifically.\n"
-        "- Lines starting with - are genuinely removed content. List them specifically.\n"
+        "- Lines starting with + are genuinely new content.\n"
+        "- Lines starting with - are genuinely removed content.\n"
        "- Lines starting with ~ have been PRE-IDENTIFIED as moved/reordered or trivial — "
        "the same text exists on both sides of the diff, or the line is a standalone timestamp. "
-        "Do NOT report ~ lines as added or removed. "
-        "If many ~ lines exist, note briefly that some content was reordered.\n"
-        "- Never list standalone timestamps like '3 hours ago', 'Yesterday', '2 minutes ago' "
-        "as added or removed items — they are not meaningful content changes.\n"
-        "For content-heavy pages (news, listings, feeds): quote or paraphrase the specific new "
-        "headlines, items, or entries that were added — do not collapse them into vague phrases "
-        "like 'new articles were added' or 'section was expanded'.\n"
-        "For large blocks of new text (full articles, documents, long paragraphs): briefly summarise "
-        "the substance in 1-2 sentences capturing the key point — do not just repeat the title.\n\n"
-        "Structure your response using these sections, in this fixed order — "
-        "omit a section entirely if there is nothing to report for it:\n"
-        "  Added: ...\n"
-        "  Changed: ...\n"
-        "  Removed: ...\n"
-        "The Removed section MUST always be last. Never place removals before additions or changes.\n\n"
-        "Follow the user's formatting instructions exactly for structure, language, and length.\n"
-        "Respond with ONLY the summary text — no JSON, no markdown code fences, no preamble. "
-        "Just the description."
+        "Do NOT treat ~ lines as added or removed.\n\n"
+        "Accuracy: only report what the +/- lines actually contain. Never invent details, "
+        "never speculate, never add information that isn't in the diff.\n\n"
+        "Follow the user's instructions exactly — including the requested output format "
+        "(plain text, JSON, Markdown, single value, etc.), structure, language, and length. "
+        "Do not add preamble, meta-commentary, or self-introduction. Produce only the output "
+        "the user asked for — nothing before it, nothing after it."
    )


@@ -1024,8 +1024,10 @@ class model(EntityPersistenceMixin, watch_base):
        prompt_hash = self._llm_summary_prompt_hash(prompt)
        fname = os.path.join(self.data_dir, f'change-summary-{from_version}-to-{to_version}-{prompt_hash}.txt')
        if not os.path.isfile(fname):
+            logger.debug(f"LLM cached diff summary '{fname}' NOT found")
            return ''
        with open(fname, 'r', encoding='utf-8') as f:
+            logger.debug(f"LLM cached diff summary '{fname}' FOUND")
            return f.read().strip()

    def save_llm_diff_summary(self, summary: str, from_version, to_version, prompt: str = ''):
@@ -343,28 +343,14 @@ class watch_base(dict):
            return

        # Import from shared schema utilities (no circular dependency)
-        from .schema_utils import get_readonly_watch_fields
-        readonly_fields = get_readonly_watch_fields()
+        from .schema_utils import get_readonly_watch_fields, SYSTEM_MANAGED_NON_SPEC_FIELDS

-        # Additional system-managed fields not in OpenAPI spec (yet)
-        # These are set by processors/workers and should not trigger edited flag
-        additional_system_fields = {
-            'last_check_status',  # Set by processors
-            'last_filter_config_hash',  # Set by text_json_diff processor, internal skip-cache
-            'restock',  # Set by restock processor
-            'last_viewed',  # Set by mark_all_viewed endpoint
-            # LLM runtime fields written back by worker/evaluator
-            '_llm_result',
-            '_llm_intent',
-            '_llm_change_summary',
-            'llm_prefilter',
-            'llm_evaluation_cache',
-            'llm_last_tokens_used',
-            'llm_tokens_used_cumulative',
-        }
-
-        # Only mark as edited if this is a user-writable field
-        if key not in readonly_fields and key not in additional_system_fields:
+        # `last_viewed` is set internally by mark_all_viewed and shouldn't flag the watch as
+        # edited, but is not in SYSTEM_MANAGED_NON_SPEC_FIELDS because it IS user-writable via
+        # the UpdateWatch schema (the API path).
+        if (key not in get_readonly_watch_fields()
+                and key != 'last_viewed'
+                and key not in SYSTEM_MANAGED_NON_SPEC_FIELDS):
            self.__watch_was_edited = True

    def __setitem__(self, key, value):
@@ -8,6 +8,35 @@ Shared by both the model layer and API layer to avoid circular dependencies.
 import functools


+# Watch fields written by workers/processors that are NOT part of the public OpenAPI spec.
+#
+# These fields exist on a watch dict at runtime but are internal implementation details
+# (skip-cache hashes, last-check status strings, LLM runtime state, etc.). Used by:
+#   - model/__init__.py: don't trigger the "edited" flag when these are written internally
+#   - api/Watch.py: strip from GET responses and silently discard from PUT/POST inputs
+#                   so that a GET → PUT round trip doesn't trip the unknown-field validator
+#
+# `last_viewed` is intentionally NOT included: it's set internally by mark_all_viewed BUT
+# is also explicitly writable via the UpdateWatch schema (see api/Watch.py valid_fields).
+SYSTEM_MANAGED_NON_SPEC_FIELDS = frozenset({
+    'last_check_status',           # Set by processors
+    'last_filter_config_hash',     # text_json_diff internal skip-cache
+    'restock',                     # Set by restock processor
+    '_llm_result',                 # LLM runtime — populated by evaluator
+    '_llm_intent',
+    '_llm_change_summary',
+    'llm_prefilter',
+    'llm_evaluation_cache',
+    'llm_last_tokens_used',
+    'llm_tokens_used_cumulative',
+})
+
+
+def get_system_managed_non_spec_fields():
+    """Return the set of internal fields not in the public OpenAPI spec."""
+    return SYSTEM_MANAGED_NON_SPEC_FIELDS
+
+
@functools.cache
 def get_openapi_schema_dict():
    """
@@ -210,10 +210,19 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
    llm_summary_prompt = ''
    if llm_configured:
        try:
-            from changedetectionio.llm.evaluator import get_effective_summary_prompt
+            from changedetectionio.llm.evaluator import (
+                get_effective_summary_prompt, build_summary_cache_prompt,
+            )
            _prompt = get_effective_summary_prompt(watch, datastore)
            llm_summary_prompt = _prompt
-            llm_diff_summary = watch.get_llm_diff_summary(from_version, to_version, prompt=_prompt)
+            # Must match the cache_prompt the worker writes and the UI ajax route reads —
+            # using UI default diff prefs so the initial render finds the worker's pre-cache.
+            _max_summary_tokens = datastore.data['settings']['application'].get('llm_max_summary_tokens', 3000)
+            _cache_prompt = build_summary_cache_prompt(
+                effective_prompt=_prompt,
+                max_summary_tokens=_max_summary_tokens,
+            )
+            llm_diff_summary = watch.get_llm_diff_summary(from_version, to_version, prompt=_cache_prompt)
        except Exception as e:
            logger.warning(f"Could not load llm-diff-summary for {uuid}: {e}")

@@ -406,6 +406,106 @@ def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path
        "extract_lines_containing should be persisted and returned via API"


+def test_api_strips_internal_fields(client, live_server, measure_memory_usage, datastore_path):
+    """
+    Internal/transient fields must never cross the API boundary in either direction:
+      1. `__`-prefixed keys (e.g. `__check_status` set by the worker for UI status)
+      2. System-managed fields not in the OpenAPI spec (see SYSTEM_MANAGED_NON_SPEC_FIELDS):
+         `last_check_status`, `last_filter_config_hash`, `_llm_*`, `llm_*`, etc.
+
+    GET responses must strip them. PUT/POST payloads must silently discard them.
+    Without this, a client that round-trips GET → PUT trips the unknown-field validator.
+    """
+    from changedetectionio.model.schema_utils import SYSTEM_MANAGED_NON_SPEC_FIELDS
+
+    api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
+    datastore = live_server.app.config['DATASTORE']
+
+    set_original_response(datastore_path=datastore_path)
+    test_url = url_for('test_endpoint', _external=True)
+
+    # Create
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({"url": test_url}),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+        follow_redirects=True
+    )
+    assert res.status_code == 201
+    watch_uuid = res.json.get('uuid')
+
+    wait_for_all_checks(client)
+
+    # Force both a transient __-prefixed and a system-managed field onto the watch,
+    # simulating worker/processor-set state.
+    watch_obj = datastore.data['watching'][watch_uuid]
+    watch_obj['__check_status'] = 'Fetching page..'
+    watch_obj['last_check_status'] = 200
+    watch_obj['_llm_result'] = {'summary': 'cached llm output'}
+    watch_obj['last_filter_config_hash'] = 'abc123'
+
+    # --- GET must strip all internal fields ---
+    res = client.get(
+        url_for("watch", uuid=watch_uuid),
+        headers={'x-api-key': api_key},
+    )
+    assert res.status_code == 200
+    assert not any(k.startswith('__') for k in res.json.keys()), \
+        f"No __-prefixed field should leak into API responses; got keys: {list(res.json.keys())}"
+    leaked_system_fields = SYSTEM_MANAGED_NON_SPEC_FIELDS & set(res.json.keys())
+    assert not leaked_system_fields, \
+        f"System-managed non-spec fields must not appear in GET response; leaked: {leaked_system_fields}"
+
+    # --- PUT must accept (and silently drop) those same internal fields ---
+    # This is the key round-trip property: a client should be able to PUT back what it just GET'd.
+    # Use the actual GET response as the payload (the realistic round-trip case).
+    payload = dict(res.json)
+    payload['__check_status'] = 'attacker-supplied value'   # not in the GET, but a client could add it
+    payload['last_check_status'] = 999                       # ditto
+    payload['_llm_result'] = 'attacker overwrite'
+    res = client.put(
+        url_for("watch", uuid=watch_uuid),
+        headers={'x-api-key': api_key, 'content-type': 'application/json'},
+        data=json.dumps(payload),
+    )
+    assert res.status_code == 200, \
+        f"PUT round-tripping GET response plus internal fields should succeed (got {res.status_code}: {res.data!r})"
+
+    # Internal fields must not have been overwritten by the PUT
+    assert watch_obj.get('__check_status') == 'Fetching page..', \
+        "PUT must not overwrite __-prefixed fields"
+    assert watch_obj.get('_llm_result') == {'summary': 'cached llm output'}, \
+        "PUT must not overwrite system-managed non-spec fields"
+
+    # --- POST must also silently discard internal fields ---
+    # Use unique sentinel values so we can distinguish "POST persisted my value" from
+    # "the worker concurrently re-set the field while processing the new watch".
+    attacker_check_status = 'attacker-sentinel-__check_status-9f7c'
+    attacker_llm_result = 'attacker-sentinel-_llm_result-9f7c'
+    res = client.post(
+        url_for("createwatch"),
+        data=json.dumps({
+            "url": test_url + "?2",
+            "__check_status": attacker_check_status,
+            "_llm_result": attacker_llm_result,
+        }),
+        headers={'content-type': 'application/json', 'x-api-key': api_key},
+        follow_redirects=True,
+    )
+    assert res.status_code == 201, \
+        f"POST with internal fields should succeed (got {res.status_code}: {res.data!r})"
+    new_uuid = res.json.get('uuid')
+    new_watch = datastore.data['watching'][new_uuid]
+    # If POST had persisted the attacker payload these specific sentinel values would remain.
+    # The worker may legitimately re-set __check_status with its own status string, that's fine.
+    assert new_watch.get('__check_status') != attacker_check_status, \
+        "POST must not persist __-prefixed fields from input"
+    assert new_watch.get('_llm_result') != attacker_llm_result, \
+        "POST must not persist system-managed fields from input"
+
+    delete_all_watches(client)
+
+
 def test_access_denied(client, live_server, measure_memory_usage, datastore_path):
    # `config_api_token_enabled` Should be On by default
    res = client.get(
@@ -799,6 +799,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -815,6 +815,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -797,6 +797,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -797,6 +797,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -835,6 +835,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -803,6 +803,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -799,6 +799,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -804,6 +804,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -799,6 +799,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr "각 모니터링 또는 태그에 일반 텍스트 판단 기준(%(ex1)s 또는 %(ex2)s)을 지정할 수 있습니다. 변경이 감지될 때마다 AI가 diff를 이 기준과 비교해 불필요한 알림을 줄입니다."

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: changedetection.io 0.55.3\n"
 "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
-"POT-Creation-Date: 2026-05-12 11:08+0200\n"
+"POT-Creation-Date: 2026-05-12 17:39+0200\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -796,6 +796,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -822,6 +822,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -832,6 +832,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -812,6 +812,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -801,6 +801,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -800,6 +800,13 @@ msgid ""
 "diff against it and suppresses irrelevant noise."
 msgstr ""

+#: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+#, python-format
+msgid ""
+"Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very "
+"small models (≤3B) may misjudge numeric comparisons."
+msgstr ""
+
 #: changedetectionio/blueprint/settings/templates/settings_llm_tab.html
 #, python-format
 msgid ""
@@ -502,22 +502,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec

                        datastore.update_watch(uuid=uuid, update_obj=update_obj)

-                        # Save AI summary file now that the new snapshot has been committed
-                        # and its version timestamp is the last key in history
-                        if update_obj.get('_llm_change_summary') and _llm_from_version:
-                            try:
-                                from changedetectionio.llm.evaluator import get_effective_summary_prompt
-                                _llm_to_version = list(watch.history.keys())[-1]
-                                _llm_prompt = get_effective_summary_prompt(watch, datastore)
-                                watch.save_llm_diff_summary(
-                                    update_obj['_llm_change_summary'],
-                                    _llm_from_version,
-                                    _llm_to_version,
-                                    prompt=_llm_prompt,
-                                )
-                            except Exception as _fe:
-                                logger.warning(f"Could not write change-summary file for {uuid}: {_fe}")
-
                        if changed_detected or not watch.history_n:
                            if update_handler.screenshot:
                                watch.save_screenshot(screenshot=update_handler.screenshot)
@@ -543,6 +527,31 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                                                    timestamp=int(fetch_start_time),
                                                    snapshot_id=update_obj.get('previous_md5', 'none'))

+                            # Save AI summary file now that the new snapshot is committed —
+                            # watch.history.keys()[-1] now reflects the just-saved version,
+                            # so the cache filename matches what the UI will later look up.
+                            # Cache key must use build_summary_cache_prompt() with UI defaults so
+                            # the worker write and the UI read hash to the same prompt_hash.
+                            if update_obj.get('_llm_change_summary') and _llm_from_version:
+                                try:
+                                    from changedetectionio.llm.evaluator import (
+                                        get_effective_summary_prompt, build_summary_cache_prompt,
+                                    )
+                                    _llm_to_version = list(watch.history.keys())[-1]
+                                    _llm_max_summary_tokens = datastore.data['settings']['application'].get('llm_max_summary_tokens', 3000)
+                                    _llm_cache_prompt = build_summary_cache_prompt(
+                                        effective_prompt=get_effective_summary_prompt(watch, datastore),
+                                        max_summary_tokens=_llm_max_summary_tokens,
+                                    )
+                                    watch.save_llm_diff_summary(
+                                        update_obj['_llm_change_summary'],
+                                        _llm_from_version,
+                                        _llm_to_version,
+                                        prompt=_llm_cache_prompt,
+                                    )
+                                except Exception as _fe:
+                                    logger.warning(f"Could not write change-summary file for {uuid}: {_fe}")
+
                            empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
                            if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
                                watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
Author	SHA1	Message	Date
dgtlmoon	11c728ee4a	Better support for watch API private vars	2026-05-15 12:21:18 +02:00
dgtlmoon	bf5efc4c31	LLM - Fixing summary cache miss-hit (#4136 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / lint-translations (push) Has been cancelled Details ChangeDetection.io App Test / lint-template-i18n (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled Details	2026-05-12 17:44:36 +02:00
dgtlmoon	aa53574332	LLM - UI - Message that 'AI Intent' (triggers) need a bigger model	2026-05-12 17:40:54 +02:00
dgtlmoon	8afa06aa96	LLM - Allow better override of formats and rules for intent/triggers	2026-05-12 17:36:39 +02:00
dgtlmoon	c6e0205622	LLM - Remove the 'format' info from the system prompt so you can create your own 'summary' formats (ie: "Make a new JSON object with the timestamp")	2026-05-12 17:28:43 +02:00
dgtlmoon	fbe59f89b6	UI - Make LLM status sticky (#4135 )	2026-05-12 15:28:33 +02:00
dgtlmoon	15be5a62db	LLM - Bumping default prompt Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / lint-translations (push) Has been cancelled Details ChangeDetection.io App Test / lint-template-i18n (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled Details	2026-05-12 13:18:58 +02:00