Files
changedetection.io/changedetectionio/llm/prompt_builder.py
T
dgtlmoon c2f06f574b WIP
2026-04-16 14:41:30 +02:00

149 lines
6.1 KiB
Python

"""
Prompt construction for LLM evaluation calls.
Pure functions — no side effects, fully testable.
"""
from .bm25_trim import trim_to_relevant
SNAPSHOT_CONTEXT_CHARS = 3_000 # current page state excerpt sent alongside the diff
def build_eval_prompt(intent: str, diff: str, current_snapshot: str = '',
url: str = '', title: str = '') -> str:
"""
Build the user message for a diff evaluation call.
The system prompt is kept separate (see build_eval_system_prompt).
"""
parts = []
if url:
parts.append(f"URL: {url}")
if title:
parts.append(f"Page title: {title}")
parts.append(f"Intent: {intent}")
if current_snapshot:
excerpt = trim_to_relevant(current_snapshot, intent, max_chars=SNAPSHOT_CONTEXT_CHARS)
if excerpt:
parts.append(f"\nCurrent page state (relevant excerpt):\n{excerpt}")
parts.append(f"\nWhat changed (diff):\n{diff}")
return '\n'.join(parts)
def build_eval_system_prompt() -> str:
return (
"You evaluate website changes for a monitoring tool.\n"
"Given an intent and a diff (added/removed lines), decide if the change matches the intent.\n\n"
"Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
'{"important": true/false, "summary": "one sentence describing the relevant change, or why it doesn\'t match"}\n\n'
"Rules:\n"
"- important=true only when the diff clearly matches the intent\n"
"- Empty, trivial, or cosmetic diffs (dates, counters, whitespace) → important=false\n"
"- Use OR logic when intent lists multiple triggers\n"
"- Summary must be in the same language as the intent\n"
"- If important=false, summary briefly explains why it doesn't match"
)
def build_preview_prompt(intent: str, content: str, url: str = '', title: str = '') -> str:
"""
Build the user message for a live-preview extraction call.
Unlike build_eval_prompt (which analyses a diff), this asks the LLM to
extract relevant information from the *current* page content — giving the
user a direct answer to their intent so they can verify it makes sense
before saving.
"""
parts = []
if url:
parts.append(f"URL: {url}")
if title:
parts.append(f"Page title: {title}")
parts.append(f"Intent / question: {intent}")
parts.append(f"\nPage content:\n{content[:6_000]}")
return '\n'.join(parts)
def build_preview_system_prompt() -> str:
return (
"You are a web page content analyzer for a website monitoring tool.\n"
"Given the user's intent or question and the current page content, "
"extract and directly answer what the intent is looking for.\n\n"
"Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
'{"found": true/false, "answer": "concise direct answer or extraction"}\n\n'
"Rules:\n"
"- found=true when the page contains something relevant to the intent\n"
"- answer must directly address the intent (e.g. for 'how many articles?''30 articles listed')\n"
"- answer must be in the same language as the intent\n"
"- Keep answer brief — one sentence maximum"
)
def build_change_summary_prompt(diff: str, custom_prompt: str,
current_snapshot: str = '', url: str = '', title: str = '') -> str:
"""
Build the user message for an AI Change Summary call.
The user supplies their own instructions (custom_prompt); this wraps them
with the diff and optional page context.
"""
parts = []
if url:
parts.append(f"URL: {url}")
if title:
parts.append(f"Page title: {title}")
parts.append(f"Instructions: {custom_prompt}")
if current_snapshot:
excerpt = trim_to_relevant(current_snapshot, custom_prompt, max_chars=2_000)
if excerpt:
parts.append(f"\nCurrent page (excerpt):\n{excerpt}")
parts.append(f"\nWhat changed (diff):\n{diff}")
return '\n'.join(parts)
def build_change_summary_system_prompt() -> str:
return (
"You summarise website changes for a monitoring notification.\n"
"Given a diff of what changed and the user's formatting instructions, "
"produce a concise plain-language description of the change.\n"
"Follow the user's instructions exactly for format, language, and length.\n"
"Respond with ONLY the summary text — no JSON, no markdown code fences, "
"no preamble. Just the description."
)
def build_setup_prompt(intent: str, snapshot_text: str, url: str = '') -> str:
"""
Build the prompt for the one-time setup call that decides whether
a CSS pre-filter would improve evaluation precision.
"""
excerpt = trim_to_relevant(snapshot_text, intent, max_chars=4_000)
parts = []
if url:
parts.append(f"URL: {url}")
parts.append(f"Intent: {intent}")
parts.append(f"\nPage content excerpt:\n{excerpt}")
return '\n'.join(parts)
def build_setup_system_prompt() -> str:
return (
"You help configure a website change monitor.\n"
"Given a monitoring intent and a sample of the page content, decide if a CSS pre-filter "
"would improve evaluation precision by scoping the content to a specific structural section.\n\n"
"Respond with ONLY a JSON object:\n"
'{"needs_prefilter": true/false, "selector": "CSS selector or null", "reason": "one sentence"}\n\n'
"Rules:\n"
"- Only recommend a pre-filter when the intent references a specific structural section "
"(e.g. 'footer', 'sidebar', 'nav', 'header', 'main', 'article') OR the page clearly "
"has high-noise sections unrelated to the intent\n"
"- Use ONLY semantic element selectors: footer, nav, header, main, article, aside, "
"or attribute-based like [id*='price'], [class*='sidebar'] — NEVER positional selectors "
"like div:nth-child(3) or //*[2]\n"
"- Default to needs_prefilter=false — most intents don't need one\n"
"- selector must be null when needs_prefilter=false"
)