Files
changedetection.io/changedetectionio/llm/client.py
T
dgtlmoon 73025494f8 WIP
2026-04-17 07:21:39 +02:00

50 lines
1.8 KiB
Python

"""
Thin wrapper around litellm.completion.
Keeps litellm import isolated so the rest of the codebase doesn't depend on it directly,
and makes the call easy to mock in tests.
"""
from loguru import logger
# Output token cap for all LLM calls — our JSON response is always <50 tokens,
# so 200 is a generous hard cap that prevents runaway per-call cost.
_MAX_COMPLETION_TOKENS = 200
def completion(model: str, messages: list, api_key: str = None,
api_base: str = None, timeout: int = 30,
max_tokens: int = None) -> tuple[str, int, int, int]:
"""
Call the LLM and return (response_text, total_tokens, input_tokens, output_tokens).
Token counts are 0 if the provider doesn't return usage data.
Raises on network/auth errors — callers handle gracefully.
"""
try:
import litellm
except ImportError:
raise RuntimeError("litellm is not installed. Add it to requirements.txt.")
kwargs = {
'model': model,
'messages': messages,
'timeout': timeout,
'temperature': 0,
'max_tokens': max_tokens if max_tokens is not None else _MAX_COMPLETION_TOKENS,
}
if api_key:
kwargs['api_key'] = api_key
if api_base:
kwargs['api_base'] = api_base
try:
response = litellm.completion(**kwargs)
text = response.choices[0].message.content
usage = getattr(response, 'usage', None)
input_tokens = int(getattr(usage, 'prompt_tokens', 0) or 0) if usage else 0
output_tokens = int(getattr(usage, 'completion_tokens', 0) or 0) if usage else 0
total_tokens = int(getattr(usage, 'total_tokens', 0) or 0) if usage else (input_tokens + output_tokens)
return text, total_tokens, input_tokens, output_tokens
except Exception as e:
logger.warning(f"LLM call failed: {e}")
raise