mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-05-01 07:10:34 +00:00
386 lines
14 KiB
Python
386 lines
14 KiB
Python
"""
|
|
LLM evaluation orchestration.
|
|
|
|
Two public entry points:
|
|
- run_setup(watch, datastore) — one-time: decide if pre-filter needed
|
|
- evaluate_change(watch, datastore, diff, current_snapshot) — per-change evaluation
|
|
|
|
Intent resolution: watch.llm_intent → first tag with llm_intent → None (no evaluation)
|
|
Cache: each (intent, diff) pair is evaluated exactly once, result stored in watch.
|
|
|
|
Environment variable overrides (take priority over datastore settings):
|
|
LLM_MODEL — model string (e.g. "gpt-4o-mini", "ollama/llama3.2")
|
|
LLM_API_KEY — API key for cloud providers
|
|
LLM_API_BASE — base URL for local/custom endpoints (e.g. http://localhost:11434)
|
|
"""
|
|
|
|
import hashlib
|
|
import os
|
|
from loguru import logger
|
|
|
|
from . import client as llm_client
|
|
from .prompt_builder import (
|
|
build_change_summary_prompt, build_change_summary_system_prompt,
|
|
build_eval_prompt, build_eval_system_prompt,
|
|
build_preview_prompt, build_preview_system_prompt,
|
|
build_setup_prompt, build_setup_system_prompt,
|
|
)
|
|
from .response_parser import parse_eval_response, parse_preview_response, parse_setup_response
|
|
|
|
# AI Change Summary can produce longer output than eval responses
|
|
_MAX_SUMMARY_TOKENS = 500
|
|
|
|
# Default prompt used when the user hasn't configured llm_change_summary
|
|
DEFAULT_CHANGE_SUMMARY_PROMPT = "Briefly describe in plain English what changed — what was added, removed, or modified."
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Intent resolution
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def resolve_llm_field(watch, datastore, field: str) -> tuple[str, str]:
|
|
"""
|
|
Generic cascade resolver for any LLM per-watch field.
|
|
Returns (value, source) where source is 'watch' or tag title.
|
|
Returns ('', '') if not set anywhere.
|
|
"""
|
|
value = (watch.get(field) or '').strip()
|
|
if value:
|
|
return value, 'watch'
|
|
|
|
for tag_uuid in watch.get('tags', []):
|
|
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
|
if tag:
|
|
tag_value = (tag.get(field) or '').strip()
|
|
if tag_value:
|
|
return tag_value, tag.get('title', 'tag')
|
|
|
|
return '', ''
|
|
|
|
|
|
def resolve_intent(watch, datastore) -> tuple[str, str]:
|
|
"""
|
|
Return (intent, source) where source is 'watch' or tag title.
|
|
Returns ('', '') if no intent is configured anywhere.
|
|
"""
|
|
intent = (watch.get('llm_intent') or '').strip()
|
|
if intent:
|
|
return intent, 'watch'
|
|
|
|
for tag_uuid in watch.get('tags', []):
|
|
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
|
if tag:
|
|
tag_intent = (tag.get('llm_intent') or '').strip()
|
|
if tag_intent:
|
|
return tag_intent, tag.get('title', 'tag')
|
|
|
|
return '', ''
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# LLM config helper
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def get_llm_config(datastore) -> dict | None:
|
|
"""
|
|
Return LLM config dict or None if not configured.
|
|
|
|
Resolution order (first non-empty model wins):
|
|
1. Environment variables: LLM_MODEL, LLM_API_KEY, LLM_API_BASE
|
|
2. Datastore settings (set via UI)
|
|
"""
|
|
# 1. Environment variable override
|
|
env_model = os.getenv('LLM_MODEL', '').strip()
|
|
if env_model:
|
|
return {
|
|
'model': env_model,
|
|
'api_key': os.getenv('LLM_API_KEY', '').strip(),
|
|
'api_base': os.getenv('LLM_API_BASE', '').strip(),
|
|
}
|
|
|
|
# 2. Datastore settings
|
|
cfg = datastore.data['settings']['application'].get('llm') or {}
|
|
if not cfg.get('model'):
|
|
return None
|
|
return cfg
|
|
|
|
|
|
def llm_configured_via_env() -> bool:
|
|
"""True when LLM config comes from environment variables, not the UI."""
|
|
return bool(os.getenv('LLM_MODEL', '').strip())
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# One-time setup: derive pre-filter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _check_token_budget(watch, cfg, tokens_this_call: int = 0) -> bool:
|
|
"""
|
|
Check token budget limits. Returns True if within budget, False if exceeded.
|
|
Also accumulates tokens_this_call into watch['llm_tokens_used_cumulative'].
|
|
"""
|
|
if tokens_this_call > 0:
|
|
current = watch.get('llm_tokens_used_cumulative') or 0
|
|
watch['llm_tokens_used_cumulative'] = current + tokens_this_call
|
|
|
|
max_per_check = int(cfg.get('max_tokens_per_check') or 0)
|
|
max_cumulative = int(cfg.get('max_tokens_cumulative') or 0)
|
|
|
|
if max_per_check and tokens_this_call > max_per_check:
|
|
logger.warning(
|
|
f"LLM token budget exceeded for {watch.get('uuid')}: "
|
|
f"{tokens_this_call} tokens > per-check limit {max_per_check}"
|
|
)
|
|
return False
|
|
|
|
if max_cumulative:
|
|
total = watch.get('llm_tokens_used_cumulative') or 0
|
|
if total > max_cumulative:
|
|
logger.warning(
|
|
f"LLM cumulative token budget exceeded for {watch.get('uuid')}: "
|
|
f"{total} tokens > limit {max_cumulative}"
|
|
)
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def run_setup(watch, datastore, snapshot_text: str) -> None:
|
|
"""
|
|
Ask the LLM whether a CSS pre-filter would improve precision for this intent.
|
|
Stores result in watch['llm_prefilter'] (str selector or None).
|
|
Called once when intent is first set, and again if pre-filter returns zero matches.
|
|
"""
|
|
cfg = get_llm_config(datastore)
|
|
if not cfg:
|
|
return
|
|
|
|
intent, _ = resolve_intent(watch, datastore)
|
|
if not intent:
|
|
return
|
|
|
|
url = watch.get('url', '')
|
|
system_prompt = build_setup_system_prompt()
|
|
user_prompt = build_setup_prompt(intent, snapshot_text, url=url)
|
|
|
|
try:
|
|
raw, tokens = llm_client.completion(
|
|
model=cfg['model'],
|
|
messages=[
|
|
{'role': 'system', 'content': system_prompt},
|
|
{'role': 'user', 'content': user_prompt},
|
|
],
|
|
api_key=cfg.get('api_key'),
|
|
api_base=cfg.get('api_base'),
|
|
)
|
|
_check_token_budget(watch, cfg, tokens)
|
|
result = parse_setup_response(raw)
|
|
watch['llm_prefilter'] = result['selector']
|
|
logger.debug(f"LLM setup for {watch.get('uuid')}: prefilter={result['selector']} reason={result['reason']}")
|
|
except Exception as e:
|
|
logger.warning(f"LLM setup call failed for {watch.get('uuid')}: {e}")
|
|
watch['llm_prefilter'] = None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AI Change Summary — human-readable description of what changed
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def get_effective_summary_prompt(watch, datastore) -> str:
|
|
"""Return the prompt that summarise_change will use — custom or the default fallback."""
|
|
prompt, _ = resolve_llm_field(watch, datastore, 'llm_change_summary')
|
|
return prompt or DEFAULT_CHANGE_SUMMARY_PROMPT
|
|
|
|
|
|
def compute_summary_cache_key(diff_text: str, prompt: str) -> str:
|
|
"""Stable 16-char hex key for a (diff, prompt) pair. Stored alongside the summary file."""
|
|
h = hashlib.md5()
|
|
h.update(diff_text.encode('utf-8', errors='replace'))
|
|
h.update(b'\x00')
|
|
h.update(prompt.encode('utf-8', errors='replace'))
|
|
return h.hexdigest()[:16]
|
|
|
|
|
|
def summarise_change(watch, datastore, diff: str, current_snapshot: str = '') -> str:
|
|
"""
|
|
Generate a plain-language summary of the change using the watch's
|
|
llm_change_summary prompt (cascades from tag if not set on watch).
|
|
|
|
Returns the summary string, or '' on failure.
|
|
The result replaces {{ diff }} in notifications so the user gets a
|
|
readable description instead of raw +/- diff lines.
|
|
"""
|
|
cfg = get_llm_config(datastore)
|
|
if not cfg:
|
|
return ''
|
|
|
|
custom_prompt, _ = resolve_llm_field(watch, datastore, 'llm_change_summary')
|
|
if not custom_prompt:
|
|
custom_prompt = DEFAULT_CHANGE_SUMMARY_PROMPT
|
|
if not diff.strip():
|
|
return ''
|
|
|
|
url = watch.get('url', '')
|
|
title = watch.get('page_title') or watch.get('title') or ''
|
|
|
|
system_prompt = build_change_summary_system_prompt()
|
|
user_prompt = build_change_summary_prompt(
|
|
diff=diff,
|
|
custom_prompt=custom_prompt,
|
|
current_snapshot=current_snapshot,
|
|
url=url,
|
|
title=title,
|
|
)
|
|
|
|
try:
|
|
raw, tokens = llm_client.completion(
|
|
model=cfg['model'],
|
|
messages=[
|
|
{'role': 'system', 'content': system_prompt},
|
|
{'role': 'user', 'content': user_prompt},
|
|
],
|
|
api_key=cfg.get('api_key'),
|
|
api_base=cfg.get('api_base'),
|
|
max_tokens=_MAX_SUMMARY_TOKENS,
|
|
)
|
|
summary = raw.strip()
|
|
_check_token_budget(watch, cfg, tokens)
|
|
watch['llm_last_tokens_used'] = (watch.get('llm_last_tokens_used') or 0) + tokens
|
|
logger.debug(
|
|
f"LLM change summary {watch.get('uuid')}: tokens={tokens} "
|
|
f"summary={summary[:80]}"
|
|
)
|
|
return summary
|
|
except Exception as e:
|
|
logger.warning(f"LLM change summary failed for {watch.get('uuid')}: {e}")
|
|
return ''
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Live-preview extraction (current content, no diff)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def preview_extract(watch, datastore, content: str) -> dict | None:
|
|
"""
|
|
For the live-preview endpoint: extract relevant information from the
|
|
*current* page content according to the watch's intent.
|
|
|
|
Unlike evaluate_change (which compares a diff), this asks the LLM to
|
|
directly answer the intent against the current snapshot — giving the user
|
|
immediate feedback like "30 articles listed" or "Price: $149, 25% off".
|
|
|
|
Returns {'found': bool, 'answer': str} or None if LLM not configured / no intent.
|
|
"""
|
|
cfg = get_llm_config(datastore)
|
|
if not cfg:
|
|
return None
|
|
|
|
intent, _ = resolve_intent(watch, datastore)
|
|
if not intent or not content.strip():
|
|
return None
|
|
|
|
url = watch.get('url', '')
|
|
title = watch.get('page_title') or watch.get('title') or ''
|
|
|
|
system_prompt = build_preview_system_prompt()
|
|
user_prompt = build_preview_prompt(intent, content, url=url, title=title)
|
|
|
|
try:
|
|
raw, tokens = llm_client.completion(
|
|
model=cfg['model'],
|
|
messages=[
|
|
{'role': 'system', 'content': system_prompt},
|
|
{'role': 'user', 'content': user_prompt},
|
|
],
|
|
api_key=cfg.get('api_key'),
|
|
api_base=cfg.get('api_base'),
|
|
)
|
|
result = parse_preview_response(raw)
|
|
logger.debug(
|
|
f"LLM preview {watch.get('uuid')}: found={result['found']} "
|
|
f"tokens={tokens} answer={result['answer'][:80]}"
|
|
)
|
|
return result
|
|
except Exception as e:
|
|
logger.warning(f"LLM preview extraction failed for {watch.get('uuid')}: {e}")
|
|
return None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Per-change evaluation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '') -> dict | None:
|
|
"""
|
|
Evaluate whether `diff` matches the watch's intent.
|
|
Returns {'important': bool, 'summary': str} or None if LLM not configured / no intent.
|
|
|
|
Results are cached by (intent, diff) hash — each unique diff is evaluated exactly once.
|
|
"""
|
|
cfg = get_llm_config(datastore)
|
|
if not cfg:
|
|
return None
|
|
|
|
intent, source = resolve_intent(watch, datastore)
|
|
if not intent:
|
|
return None
|
|
|
|
if not diff or not diff.strip():
|
|
return {'important': False, 'summary': ''}
|
|
|
|
# Cache lookup — evaluations are deterministic once cached
|
|
cache_key = hashlib.sha256(f"{intent}||{diff}".encode()).hexdigest()
|
|
cache = watch.get('llm_evaluation_cache') or {}
|
|
if cache_key in cache:
|
|
logger.debug(f"LLM cache hit for {watch.get('uuid')} key={cache_key[:8]}")
|
|
return cache[cache_key]
|
|
|
|
# Check cumulative budget before making the call
|
|
if not _check_token_budget(watch, cfg):
|
|
# Already over budget — fail open (don't suppress notification)
|
|
return {'important': True, 'summary': ''}
|
|
|
|
url = watch.get('url', '')
|
|
title = watch.get('page_title') or watch.get('title') or ''
|
|
|
|
system_prompt = build_eval_system_prompt()
|
|
user_prompt = build_eval_prompt(
|
|
intent=intent,
|
|
diff=diff,
|
|
current_snapshot=current_snapshot,
|
|
url=url,
|
|
title=title,
|
|
)
|
|
|
|
try:
|
|
raw, tokens = llm_client.completion(
|
|
model=cfg['model'],
|
|
messages=[
|
|
{'role': 'system', 'content': system_prompt},
|
|
{'role': 'user', 'content': user_prompt},
|
|
],
|
|
api_key=cfg.get('api_key'),
|
|
api_base=cfg.get('api_base'),
|
|
)
|
|
result = parse_eval_response(raw)
|
|
except Exception as e:
|
|
logger.warning(f"LLM evaluation failed for {watch.get('uuid')}: {e}")
|
|
# On failure: don't suppress the notification — pass through as important
|
|
watch['llm_last_tokens_used'] = 0
|
|
return {'important': True, 'summary': ''}
|
|
|
|
# Accumulate token usage and enforce per-check limit
|
|
_check_token_budget(watch, cfg, tokens)
|
|
watch['llm_last_tokens_used'] = tokens
|
|
|
|
# Store in cache
|
|
if 'llm_evaluation_cache' not in watch or watch['llm_evaluation_cache'] is None:
|
|
watch['llm_evaluation_cache'] = {}
|
|
watch['llm_evaluation_cache'][cache_key] = result
|
|
|
|
logger.debug(
|
|
f"LLM eval {watch.get('uuid')} (intent from {source}): "
|
|
f"important={result['important']} tokens={tokens} summary={result['summary'][:80]}"
|
|
)
|
|
return result
|