mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-30 23:00:30 +00:00
e9e8c8d218
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / lint-translations (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled
116 lines
4.7 KiB
Python
116 lines
4.7 KiB
Python
"""
|
|
Thin wrapper around litellm.completion.
|
|
Keeps litellm import isolated so the rest of the codebase doesn't depend on it directly,
|
|
and makes the call easy to mock in tests.
|
|
"""
|
|
|
|
import os
|
|
from loguru import logger
|
|
|
|
# Default output token cap for JSON-returning calls (intent eval, preview, setup).
|
|
# These return small JSON objects — 400 is enough for a verbose explanation while
|
|
# still preventing runaway cost. Change summaries pass their own max_tokens via
|
|
# _summary_max_tokens() and are NOT subject to this cap.
|
|
_MAX_COMPLETION_TOKENS = 400
|
|
|
|
DEFAULT_TIMEOUT = int(os.getenv('LLM_TIMEOUT', 60))
|
|
DEFAULT_RETRIES = 3
|
|
|
|
|
|
def completion(model: str, messages: list, api_key: str = None,
|
|
api_base: str = None, timeout: int = DEFAULT_TIMEOUT,
|
|
max_tokens: int = None, extra_body: dict = None) -> tuple[str, int, int, int]:
|
|
"""
|
|
Call the LLM and return (response_text, total_tokens, input_tokens, output_tokens).
|
|
Retries up to DEFAULT_RETRIES times on timeout or connection errors.
|
|
Token counts are 0 if the provider doesn't return usage data.
|
|
Raises on network/auth errors — callers handle gracefully.
|
|
"""
|
|
try:
|
|
import litellm
|
|
except ImportError:
|
|
raise RuntimeError("litellm is not installed. Add it to requirements.txt.")
|
|
|
|
_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
|
|
|
|
kwargs = {
|
|
'model': model,
|
|
'messages': messages,
|
|
'timeout': _timeout,
|
|
'temperature': 0,
|
|
'max_tokens': max_tokens if max_tokens is not None else _MAX_COMPLETION_TOKENS,
|
|
}
|
|
if api_key:
|
|
kwargs['api_key'] = api_key
|
|
if api_base:
|
|
kwargs['api_base'] = api_base
|
|
if extra_body:
|
|
kwargs['extra_body'] = extra_body
|
|
|
|
_retryable = (litellm.Timeout, litellm.APIConnectionError)
|
|
|
|
for attempt in range(1, DEFAULT_RETRIES + 1):
|
|
try:
|
|
response = litellm.completion(**kwargs)
|
|
choice = response.choices[0]
|
|
message = choice.message
|
|
finish = getattr(choice, 'finish_reason', None)
|
|
|
|
text = message.content or ''
|
|
|
|
if not text:
|
|
# Some providers (e.g. Gemini) put text in message.parts instead of .content
|
|
parts = getattr(message, 'parts', None)
|
|
if parts:
|
|
text = ''.join(getattr(p, 'text', '') or '' for p in parts).strip()
|
|
logger.debug(f"LLM client: extracted text from message.parts ({len(parts)} parts) model={model!r}")
|
|
|
|
if finish == 'length':
|
|
logger.warning(
|
|
f"LLM client: response truncated (finish_reason='length') model={model!r} "
|
|
f"— increase max_tokens; got {len(text)} chars so far"
|
|
)
|
|
|
|
if not text:
|
|
logger.warning(
|
|
f"LLM client: empty content from model={model!r} "
|
|
f"finish_reason={finish!r} "
|
|
f"message={message!r}"
|
|
)
|
|
|
|
usage = getattr(response, 'usage', None)
|
|
input_tokens = int(getattr(usage, 'prompt_tokens', 0) or 0) if usage else 0
|
|
output_tokens = int(getattr(usage, 'completion_tokens', 0) or 0) if usage else 0
|
|
total_tokens = int(getattr(usage, 'total_tokens', 0) or 0) if usage else (input_tokens + output_tokens)
|
|
logger.debug(
|
|
f"LLM client: model={model!r} finish={finish!r} "
|
|
f"tokens={total_tokens} (in={input_tokens} out={output_tokens}) "
|
|
f"text_len={len(text)}"
|
|
)
|
|
return text, total_tokens, input_tokens, output_tokens
|
|
|
|
except _retryable as e:
|
|
# litellm formats its Timeout message with None when the provider doesn't
|
|
# propagate the timeout value — patch the exception args in-place so every
|
|
# caller that logs str(e) sees the real number.
|
|
_fix = f'after {_timeout} seconds'
|
|
try:
|
|
e.args = tuple(str(a).replace('after None seconds', _fix) for a in e.args)
|
|
except Exception:
|
|
pass
|
|
if attempt < DEFAULT_RETRIES:
|
|
logger.warning(
|
|
f"LLM call timed out/connection error (attempt {attempt}/{DEFAULT_RETRIES}), "
|
|
f"retrying — model={model!r} timeout={_timeout}s error={e}"
|
|
)
|
|
continue
|
|
logger.warning(
|
|
f"LLM call failed after {DEFAULT_RETRIES} attempts ({_timeout}s timeout) "
|
|
f"model={model!r} error={e}"
|
|
)
|
|
raise
|
|
|
|
except Exception as e:
|
|
logger.warning(f"LLM call failed: model={model!r} error={e}")
|
|
raise
|