From 956664cd4cb2839df2c8b4f04c5e4486ed64451a Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 23 Apr 2026 03:13:56 +0200
Subject: [PATCH] token len WIP

---
 changedetectionio/llm/client.py    | 14 +++++++++++---
 changedetectionio/llm/evaluator.py | 12 +++++++-----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/changedetectionio/llm/client.py b/changedetectionio/llm/client.py
index a184e4c2..9cb4f6e5 100644
--- a/changedetectionio/llm/client.py
+++ b/changedetectionio/llm/client.py
@@ -6,9 +6,11 @@ and makes the call easy to mock in tests.
 
 from loguru import logger
 
-# Output token cap for all LLM calls — our JSON response is always <50 tokens,
-# so 200 is a generous hard cap that prevents runaway per-call cost.
-_MAX_COMPLETION_TOKENS = 200
+# Default output token cap for JSON-returning calls (intent eval, preview, setup).
+# These return small JSON objects — 400 is enough for a verbose explanation while
+# still preventing runaway cost. Change summaries pass their own max_tokens via
+# _summary_max_tokens() and are NOT subject to this cap.
+_MAX_COMPLETION_TOKENS = 400
 
 
 def completion(model: str, messages: list, api_key: str = None,
@@ -51,6 +53,12 @@ def completion(model: str, messages: list, api_key: str = None,
                 text = ''.join(getattr(p, 'text', '') or '' for p in parts).strip()
                 logger.debug(f"LLM client: extracted text from message.parts ({len(parts)} parts) model={model!r}")
 
+        if finish == 'length':
+            logger.warning(
+                f"LLM client: response truncated (finish_reason='length') model={model!r} "
+                f"— increase max_tokens; got {len(text)} chars so far"
+            )
+
         if not text:
             logger.warning(
                 f"LLM client: empty content from model={model!r} "
diff --git a/changedetectionio/llm/evaluator.py b/changedetectionio/llm/evaluator.py
index 8f5f6f28..bbc8913f 100644
--- a/changedetectionio/llm/evaluator.py
+++ b/changedetectionio/llm/evaluator.py
@@ -68,13 +68,15 @@ def _cached_system(text: str, model: str = '') -> dict:
 
 
 def _summary_max_tokens(diff: str) -> int:
-    """Scale completion tokens to diff size so short diffs aren't over-allocated
-    and long diffs (or all_changes multi-snapshot) aren't truncated mid-sentence.
+    """Scale completion tokens to diff size.
 
-    ~1 LLM token ≈ 4 chars of English text; output is roughly proportional to input.
-    Bounds: 400 (minimum for any meaningful summary) … 3 000 (cost sanity ceiling).
+    Short diffs (<3 000 chars) get the full 3 000-token ceiling — they're cheap
+    and we never want truncation on small changes.
+    Longer diffs scale at ~1 output token per 2 input chars, capped at 3 000.
     """
-    return min(max(400, len(diff) // 4), 3000)
+    if len(diff) < 3000:
+        return 3000
+    return min(len(diff) // 2, 3000)
 
 # Default prompt used when the user hasn't configured llm_change_summary
 DEFAULT_CHANGE_SUMMARY_PROMPT = "Describe in plain English what changed — list what was added or removed as bullet points, including key details for each item. Be careful of content that merely just moved around, you should mention that it moved but dont report that it was added/removed etc. Be considerate of the style content you are summarising the change of, adjust your report accordingly. Do not quote non-English text verbatim; translate and summarise all content into English. Your entire response must be in English."