Compare commits

...

2 Commits

Author SHA1 Message Date
dgtlmoon 96beb107c3 UI - LLM - Fix for settings (wtforms vs pydantic) 2026-05-25 18:17:28 +02:00
dgtlmoon b7bb67fac4 LLM - Smarter reasoning budget logic for gemini models 2026-05-25 18:03:11 +02:00
3 changed files with 97 additions and 3 deletions
@@ -99,6 +99,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
llm_form_input = dict(form.data.get('llm') or {})
# Empty IntegerField submissions come back as None from WTForms;
# the schema declares those fields as strict `int`, so passing
# them through would fail validation. Treat None like the
# absent-key case: keep the stored value, don't merge.
llm_form_input = {k: v for k, v in llm_form_input.items() if v is not None}
# PasswordField never re-renders, so a blank submitted value means
# "keep stored key" — drop it from the merge.
if not (llm_form_input.get('api_key') or '').strip():
+16 -3
View File
@@ -82,10 +82,23 @@ def _check_input_size(text: str, max_chars: int) -> None:
def _thinking_extra_body(model: str, budget: int) -> dict | None:
"""Return litellm extra_body to control thinking for models that support it.
For Gemini 2.5+: passes thinkingConfig with the given budget (0 = disabled).
For all other models: returns None (no-op).
The `thinkingConfig.thinkingBudget` payload is Gemini-specific (Anthropic and
OpenAI reasoning models use different parameters), so we gate on the gemini/
provider prefix first, then defer to litellm's model registry for the actual
"does this model think?" decision. That picks up new Gemini variants and
rolling aliases (`gemini-flash-latest`, etc.) as litellm's registry tracks
them, without us hardcoding model names here.
"""
if not model.startswith('gemini/gemini-2.5'):
if not model.startswith('gemini/'):
return None
try:
import litellm
if not litellm.get_model_info(model).get('supports_reasoning'):
return None
except Exception:
# Unknown model or registry lookup failed — skip the thinking config
# rather than guess. Worst case: thinking stays at the provider default.
return None
return {'generationConfig': {'thinkingConfig': {'thinkingBudget': budget}}}
@@ -196,6 +196,81 @@ def test_settings_form_preserves_token_counters(
delete_all_watches(client)
def test_settings_form_blank_llm_integer_fields_preserve_stored_values(
client, live_server, measure_memory_usage, datastore_path):
"""
Empty IntegerField submissions come back as None from WTForms. LLMSettings
declares token_budget_month / max_input_chars / max_tokens_per_count_period /
local_token_multiplier as strict `int`, so a None passed through to
model_validate raises ValidationError and 500s the settings save.
Regression for settings/__init__.py — the LLM merge must drop None values
(treat them like absent keys) so blank IntegerField submissions preserve
the stored value instead of crashing the form.
"""
ds = client.application.config.get('DATASTORE')
ds.data['settings']['application']['llm'] = {
'model': 'gpt-4o',
'api_key': 'sk-existing',
'token_budget_month': 50000,
'max_input_chars': 200000,
'max_tokens_per_count_period': 1000,
'local_token_multiplier': 3,
}
res = client.post(
url_for('settings.settings_page'),
data={
'llm-model': 'gpt-4o',
'llm-api_key': '',
'llm-api_base': '',
# The bug-trigger: every LLM IntegerField submitted blank
'llm-token_budget_month': '',
'llm-max_input_chars': '',
'llm-max_tokens_per_count_period': '',
'llm-local_token_multiplier': '',
# Minimal required fields for the rest of the form to validate.
# 'System default' is popped from notification_format choices for the
# global form, so it must be one of the real codes (e.g. 'html').
'application-pager_size': '50',
'application-notification_format': 'html',
'application-fetch_backend': 'html_requests',
'application-rss_diff_length': '5',
'application-filter_failure_notification_threshold_attempts': '0',
'requests-time_between_check-days': '0',
'requests-time_between_check-hours': '0',
'requests-time_between_check-minutes': '5',
'requests-time_between_check-seconds': '0',
'requests-time_between_check-weeks': '0',
'requests-jitter_seconds': '0',
'requests-workers': '10',
'requests-timeout': '60',
},
follow_redirects=True,
)
assert res.status_code == 200, \
f"Settings save crashed on blank LLM IntegerField submission (got {res.status_code})"
# Sanity: the form must have actually validated and reached the LLM save path
# — without this the test would trivially pass because the buggy code never ran.
assert b'Settings updated.' in res.data, \
"Settings form did not validate — the bug-path was never exercised. Check fixture fields."
body = res.data.decode('utf-8', errors='replace')
assert 'ValidationError' not in body, \
"Pydantic ValidationError leaked into the response — blank IntegerField wasn't filtered"
llm = ds.data['settings']['application'].get('llm') or {}
assert llm.get('token_budget_month') == 50000, \
f"Blank submission must preserve stored token_budget_month (got {llm.get('token_budget_month')!r})"
assert llm.get('max_input_chars') == 200000, \
f"Blank submission must preserve stored max_input_chars (got {llm.get('max_input_chars')!r})"
assert llm.get('max_tokens_per_count_period') == 1000, \
f"Blank submission must preserve stored max_tokens_per_count_period (got {llm.get('max_tokens_per_count_period')!r})"
assert llm.get('local_token_multiplier') == 3, \
f"Blank submission must preserve stored local_token_multiplier (got {llm.get('local_token_multiplier')!r})"
delete_all_watches(client)
def test_settings_form_cannot_inject_fake_token_counts(
client, live_server, measure_memory_usage, datastore_path):
"""