mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-05-01 15:20:33 +00:00
149 lines
6.1 KiB
Python
149 lines
6.1 KiB
Python
"""
|
|
Prompt construction for LLM evaluation calls.
|
|
Pure functions — no side effects, fully testable.
|
|
"""
|
|
|
|
from .bm25_trim import trim_to_relevant
|
|
|
|
SNAPSHOT_CONTEXT_CHARS = 3_000 # current page state excerpt sent alongside the diff
|
|
|
|
|
|
def build_eval_prompt(intent: str, diff: str, current_snapshot: str = '',
|
|
url: str = '', title: str = '') -> str:
|
|
"""
|
|
Build the user message for a diff evaluation call.
|
|
The system prompt is kept separate (see build_eval_system_prompt).
|
|
"""
|
|
parts = []
|
|
|
|
if url:
|
|
parts.append(f"URL: {url}")
|
|
if title:
|
|
parts.append(f"Page title: {title}")
|
|
|
|
parts.append(f"Intent: {intent}")
|
|
|
|
if current_snapshot:
|
|
excerpt = trim_to_relevant(current_snapshot, intent, max_chars=SNAPSHOT_CONTEXT_CHARS)
|
|
if excerpt:
|
|
parts.append(f"\nCurrent page state (relevant excerpt):\n{excerpt}")
|
|
|
|
parts.append(f"\nWhat changed (diff):\n{diff}")
|
|
|
|
return '\n'.join(parts)
|
|
|
|
|
|
def build_eval_system_prompt() -> str:
|
|
return (
|
|
"You evaluate website changes for a monitoring tool.\n"
|
|
"Given an intent and a diff (added/removed lines), decide if the change matches the intent.\n\n"
|
|
"Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
|
|
'{"important": true/false, "summary": "one sentence describing the relevant change, or why it doesn\'t match"}\n\n'
|
|
"Rules:\n"
|
|
"- important=true only when the diff clearly matches the intent\n"
|
|
"- Empty, trivial, or cosmetic diffs (dates, counters, whitespace) → important=false\n"
|
|
"- Use OR logic when intent lists multiple triggers\n"
|
|
"- Summary must be in the same language as the intent\n"
|
|
"- If important=false, summary briefly explains why it doesn't match"
|
|
)
|
|
|
|
|
|
def build_preview_prompt(intent: str, content: str, url: str = '', title: str = '') -> str:
|
|
"""
|
|
Build the user message for a live-preview extraction call.
|
|
Unlike build_eval_prompt (which analyses a diff), this asks the LLM to
|
|
extract relevant information from the *current* page content — giving the
|
|
user a direct answer to their intent so they can verify it makes sense
|
|
before saving.
|
|
"""
|
|
parts = []
|
|
if url:
|
|
parts.append(f"URL: {url}")
|
|
if title:
|
|
parts.append(f"Page title: {title}")
|
|
parts.append(f"Intent / question: {intent}")
|
|
parts.append(f"\nPage content:\n{content[:6_000]}")
|
|
return '\n'.join(parts)
|
|
|
|
|
|
def build_preview_system_prompt() -> str:
|
|
return (
|
|
"You are a web page content analyzer for a website monitoring tool.\n"
|
|
"Given the user's intent or question and the current page content, "
|
|
"extract and directly answer what the intent is looking for.\n\n"
|
|
"Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
|
|
'{"found": true/false, "answer": "concise direct answer or extraction"}\n\n'
|
|
"Rules:\n"
|
|
"- found=true when the page contains something relevant to the intent\n"
|
|
"- answer must directly address the intent (e.g. for 'how many articles?' → '30 articles listed')\n"
|
|
"- answer must be in the same language as the intent\n"
|
|
"- Keep answer brief — one sentence maximum"
|
|
)
|
|
|
|
|
|
def build_change_summary_prompt(diff: str, custom_prompt: str,
|
|
current_snapshot: str = '', url: str = '', title: str = '') -> str:
|
|
"""
|
|
Build the user message for an AI Change Summary call.
|
|
The user supplies their own instructions (custom_prompt); this wraps them
|
|
with the diff and optional page context.
|
|
"""
|
|
parts = []
|
|
if url:
|
|
parts.append(f"URL: {url}")
|
|
if title:
|
|
parts.append(f"Page title: {title}")
|
|
parts.append(f"Instructions: {custom_prompt}")
|
|
if current_snapshot:
|
|
excerpt = trim_to_relevant(current_snapshot, custom_prompt, max_chars=2_000)
|
|
if excerpt:
|
|
parts.append(f"\nCurrent page (excerpt):\n{excerpt}")
|
|
parts.append(f"\nWhat changed (diff):\n{diff}")
|
|
return '\n'.join(parts)
|
|
|
|
|
|
def build_change_summary_system_prompt() -> str:
|
|
return (
|
|
"You summarise website changes for a monitoring notification.\n"
|
|
"Given a diff of what changed and the user's formatting instructions, "
|
|
"produce a concise plain-language description of the change.\n"
|
|
"Follow the user's instructions exactly for format, language, and length.\n"
|
|
"Respond with ONLY the summary text — no JSON, no markdown code fences, "
|
|
"no preamble. Just the description."
|
|
)
|
|
|
|
|
|
def build_setup_prompt(intent: str, snapshot_text: str, url: str = '') -> str:
|
|
"""
|
|
Build the prompt for the one-time setup call that decides whether
|
|
a CSS pre-filter would improve evaluation precision.
|
|
"""
|
|
excerpt = trim_to_relevant(snapshot_text, intent, max_chars=4_000)
|
|
|
|
parts = []
|
|
if url:
|
|
parts.append(f"URL: {url}")
|
|
parts.append(f"Intent: {intent}")
|
|
parts.append(f"\nPage content excerpt:\n{excerpt}")
|
|
|
|
return '\n'.join(parts)
|
|
|
|
|
|
def build_setup_system_prompt() -> str:
|
|
return (
|
|
"You help configure a website change monitor.\n"
|
|
"Given a monitoring intent and a sample of the page content, decide if a CSS pre-filter "
|
|
"would improve evaluation precision by scoping the content to a specific structural section.\n\n"
|
|
"Respond with ONLY a JSON object:\n"
|
|
'{"needs_prefilter": true/false, "selector": "CSS selector or null", "reason": "one sentence"}\n\n'
|
|
"Rules:\n"
|
|
"- Only recommend a pre-filter when the intent references a specific structural section "
|
|
"(e.g. 'footer', 'sidebar', 'nav', 'header', 'main', 'article') OR the page clearly "
|
|
"has high-noise sections unrelated to the intent\n"
|
|
"- Use ONLY semantic element selectors: footer, nav, header, main, article, aside, "
|
|
"or attribute-based like [id*='price'], [class*='sidebar'] — NEVER positional selectors "
|
|
"like div:nth-child(3) or //*[2]\n"
|
|
"- Default to needs_prefilter=false — most intents don't need one\n"
|
|
"- selector must be null when needs_prefilter=false"
|
|
)
|