token len WIP

2026-05-01 15:20:33 +00:00 · 2026-04-23 03:13:56 +02:00
parent b9fde7fcb5
commit 956664cd4c
2 changed files with 18 additions and 8 deletions
@@ -6,9 +6,11 @@ and makes the call easy to mock in tests.

 from loguru import logger

-# Output token cap for all LLM calls — our JSON response is always <50 tokens,
-# so 200 is a generous hard cap that prevents runaway per-call cost.
-_MAX_COMPLETION_TOKENS = 200
+# Default output token cap for JSON-returning calls (intent eval, preview, setup).
+# These return small JSON objects — 400 is enough for a verbose explanation while
+# still preventing runaway cost. Change summaries pass their own max_tokens via
+# _summary_max_tokens() and are NOT subject to this cap.
+_MAX_COMPLETION_TOKENS = 400


 def completion(model: str, messages: list, api_key: str = None,
@@ -51,6 +53,12 @@ def completion(model: str, messages: list, api_key: str = None,
                text = ''.join(getattr(p, 'text', '') or '' for p in parts).strip()
                logger.debug(f"LLM client: extracted text from message.parts ({len(parts)} parts) model={model!r}")

+        if finish == 'length':
+            logger.warning(
+                f"LLM client: response truncated (finish_reason='length') model={model!r} "
+                f"— increase max_tokens; got {len(text)} chars so far"
+            )
+
        if not text:
            logger.warning(
                f"LLM client: empty content from model={model!r} "
@@ -68,13 +68,15 @@ def _cached_system(text: str, model: str = '') -> dict:


 def _summary_max_tokens(diff: str) -> int:
-    """Scale completion tokens to diff size so short diffs aren't over-allocated
-    and long diffs (or all_changes multi-snapshot) aren't truncated mid-sentence.
+    """Scale completion tokens to diff size.

-    ~1 LLM token ≈ 4 chars of English text; output is roughly proportional to input.
-    Bounds: 400 (minimum for any meaningful summary) … 3 000 (cost sanity ceiling).
+    Short diffs (<3 000 chars) get the full 3 000-token ceiling — they're cheap
+    and we never want truncation on small changes.
+    Longer diffs scale at ~1 output token per 2 input chars, capped at 3 000.
    """
-    return min(max(400, len(diff) // 4), 3000)
+    if len(diff) < 3000:
+        return 3000
+    return min(len(diff) // 2, 3000)

 # Default prompt used when the user hasn't configured llm_change_summary
 DEFAULT_CHANGE_SUMMARY_PROMPT = "Describe in plain English what changed — list what was added or removed as bullet points, including key details for each item. Be careful of content that merely just moved around, you should mention that it moved but dont report that it was added/removed etc. Be considerate of the style content you are summarising the change of, adjust your report accordingly. Do not quote non-English text verbatim; translate and summarise all content into English. Your entire response must be in English."