changedetection.io/changedetectionio/llm/prompt_builder.py

"""
Prompt construction for LLM evaluation calls.
Pure functions — no side effects, fully testable.
"""

from .bm25_trim import trim_to_relevant

SNAPSHOT_CONTEXT_CHARS = 3_000   # current page state excerpt sent alongside the diff


def build_eval_prompt(intent: str, diff: str, current_snapshot: str = '',
                      url: str = '', title: str = '') -> str:
    """
    Build the user message for a diff evaluation call.
    The system prompt is kept separate (see build_eval_system_prompt).
    """
    parts = []

    if url:
        parts.append(f"URL: {url}")
    if title:
        parts.append(f"Page title: {title}")

    parts.append(f"Intent: {intent}")

    if current_snapshot:
        excerpt = trim_to_relevant(current_snapshot, intent, max_chars=SNAPSHOT_CONTEXT_CHARS)
        if excerpt:
            parts.append(f"\nCurrent page state (relevant excerpt):\n{excerpt}")

    parts.append(f"\nWhat changed (diff):\n{diff}")

    return '\n'.join(parts)


def build_eval_system_prompt() -> str:
    return (
        "You evaluate website changes for a monitoring tool.\n"
        "Given an intent and a diff (added/removed lines), decide if the change matches the intent.\n\n"
        "Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
        '{"important": true/false, "summary": "one sentence describing the relevant change, or why it doesn\'t match"}\n\n'
        "Rules:\n"
        "- important=true only when the diff clearly matches the intent\n"
        "- Empty, trivial, or cosmetic diffs (dates, counters, whitespace) → important=false\n"
        "- Use OR logic when intent lists multiple triggers\n"
        "- Summary must be in the same language as the intent\n"
        "- If important=false, summary briefly explains why it doesn't match"
    )


def build_preview_prompt(intent: str, content: str, url: str = '', title: str = '') -> str:
    """
    Build the user message for a live-preview extraction call.
    Unlike build_eval_prompt (which analyses a diff), this asks the LLM to
    extract relevant information from the *current* page content — giving the
    user a direct answer to their intent so they can verify it makes sense
    before saving.
    """
    parts = []
    if url:
        parts.append(f"URL: {url}")
    if title:
        parts.append(f"Page title: {title}")
    parts.append(f"Intent / question: {intent}")
    parts.append(f"\nPage content:\n{content[:6_000]}")
    return '\n'.join(parts)


def build_preview_system_prompt() -> str:
    return (
        "You are a web page content analyzer for a website monitoring tool.\n"
        "Given the user's intent or question and the current page content, "
        "extract and directly answer what the intent is looking for.\n\n"
        "Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
        '{"found": true/false, "answer": "concise direct answer or extraction"}\n\n'
        "Rules:\n"
        "- found=true when the page contains something relevant to the intent\n"
        "- answer must directly address the intent (e.g. for 'how many articles?' → '30 articles listed')\n"
        "- answer must be in the same language as the intent\n"
        "- Keep answer brief — one sentence maximum"
    )


def build_change_summary_prompt(diff: str, custom_prompt: str,
                                current_snapshot: str = '', url: str = '', title: str = '') -> str:
    """
    Build the user message for an AI Change Summary call.
    The user supplies their own instructions (custom_prompt); this wraps them
    with the diff and optional page context.
    """
    parts = []
    if url:
        parts.append(f"URL: {url}")
    if title:
        parts.append(f"Page title: {title}")
    parts.append(f"Instructions: {custom_prompt}")
    if current_snapshot:
        excerpt = trim_to_relevant(current_snapshot, custom_prompt, max_chars=2_000)
        if excerpt:
            parts.append(f"\nCurrent page (excerpt):\n{excerpt}")
    parts.append(f"\nWhat changed (diff):\n{diff}")
    return '\n'.join(parts)


def build_change_summary_system_prompt() -> str:
    return (
        "You summarise website changes for a monitoring notification.\n"
        "Given a diff of what changed and the user's formatting instructions, "
        "produce a concise plain-language description of the change.\n"
        "Follow the user's instructions exactly for format, language, and length.\n"
        "Respond with ONLY the summary text — no JSON, no markdown code fences, "
        "no preamble. Just the description."
    )


def build_setup_prompt(intent: str, snapshot_text: str, url: str = '') -> str:
    """
    Build the prompt for the one-time setup call that decides whether
    a CSS pre-filter would improve evaluation precision.
    """
    excerpt = trim_to_relevant(snapshot_text, intent, max_chars=4_000)

    parts = []
    if url:
        parts.append(f"URL: {url}")
    parts.append(f"Intent: {intent}")
    parts.append(f"\nPage content excerpt:\n{excerpt}")

    return '\n'.join(parts)


def build_setup_system_prompt() -> str:
    return (
        "You help configure a website change monitor.\n"
        "Given a monitoring intent and a sample of the page content, decide if a CSS pre-filter "
        "would improve evaluation precision by scoping the content to a specific structural section.\n\n"
        "Respond with ONLY a JSON object:\n"
        '{"needs_prefilter": true/false, "selector": "CSS selector or null", "reason": "one sentence"}\n\n'
        "Rules:\n"
        "- Only recommend a pre-filter when the intent references a specific structural section "
        "(e.g. 'footer', 'sidebar', 'nav', 'header', 'main', 'article') OR the page clearly "
        "has high-noise sections unrelated to the intent\n"
        "- Use ONLY semantic element selectors: footer, nav, header, main, article, aside, "
        "or attribute-based like [id*='price'], [class*='sidebar'] — NEVER positional selectors "
        "like div:nth-child(3) or //*[2]\n"
        "- Default to needs_prefilter=false — most intents don't need one\n"
        "- selector must be null when needs_prefilter=false"
    )