From 58d15ed385c8fb8e5f9740c23a331a37cf7a9870 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Mon, 23 Feb 2026 21:27:56 +0100
Subject: [PATCH] WIP

---
 changedetectionio/blueprint/ui/edit.py        |   3 +-
 changedetectionio/llm/plugin.py               |   5 +-
 changedetectionio/llm/queue_worker.py         | 128 +++++++++++++-----
 changedetectionio/llm/settings_form.py        |  15 +-
 .../llm/templates/settings-llm.html           |  11 +-
 changedetectionio/llm/tokens.py               |  22 ++-
 changedetectionio/static/js/llm.js            |  26 +++-
 7 files changed, 153 insertions(+), 57 deletions(-)
diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py
index 22d6ed51..84c43514 100644
--- a/changedetectionio/blueprint/ui/edit.py
+++ b/changedetectionio/blueprint/ui/edit.py
@@ -409,6 +409,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
     def watch_regenerate_llm_summaries(uuid):
         """Queue LLM summary generation for all history entries that don't yet have one."""
         from flask import flash
+        from changedetectionio.llm.tokens import is_llm_data_ready
         watch = datastore.data['watching'].get(uuid)
         if not watch:
             abort(404)
@@ -427,7 +428,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
             snapshot_id = os.path.basename(snapshot_fname).split('.')[0]  # always 32-char MD5
 
             # Skip entries that already have a summary
-            if os.path.exists(os.path.join(watch.data_dir, f"{snapshot_id}-llm.txt")):
+            if is_llm_data_ready(watch.data_dir, snapshot_id):
                 continue
 
             llm_summary_q.put({
diff --git a/changedetectionio/llm/plugin.py b/changedetectionio/llm/plugin.py
index 66f1ca4e..d60070fe 100644
--- a/changedetectionio/llm/plugin.py
+++ b/changedetectionio/llm/plugin.py
@@ -48,8 +48,9 @@ def save_llm_settings(datastore, plugin_form):
     so they are never persisted to llm.json."""
     from changedetectionio.pluggy_interface import save_plugin_settings
     data = {
-        'llm_connection':    plugin_form.llm_connection.data,
-        'llm_summary_prompt': plugin_form.llm_summary_prompt.data or '',
+        'llm_connection':         plugin_form.llm_connection.data,
+        'llm_summary_prompt':     plugin_form.llm_summary_prompt.data or '',
+        'llm_diff_context_lines': plugin_form.llm_diff_context_lines.data or 2,
     }
     save_plugin_settings(datastore.datastore_path, 'llm', data)
 
diff --git a/changedetectionio/llm/queue_worker.py b/changedetectionio/llm/queue_worker.py
index d902e073..5e227c68 100644
--- a/changedetectionio/llm/queue_worker.py
+++ b/changedetectionio/llm/queue_worker.py
@@ -1,8 +1,10 @@
+import fcntl
 import os
 import queue
 import re
 import threading
 import time
+from datetime import datetime, timezone
 from loguru import logger
 
 from changedetectionio.llm.tokens import (
@@ -94,7 +96,30 @@ def _read_snapshot(watch, snapshot_fname):
             return f.read()
 
 
-def _call_llm(model, messages, api_key=None, api_base=None, max_tokens=600, conn_id=None, tpm=0):
+def _append_llm_log(log_path, model, sent_tokens, recv_tokens, elapsed_ms):
+    """Append one line to the datastore-level LLM activity log.
+
+    Line format (tab-separated, LF terminated):
+        ISO-8601-UTC  fetched LLM via <model>  sent=<N>  recv=<N>  ms=<N>
+
+    The file is flock-locked for the duration of the write so concurrent
+    workers don't interleave lines.
+    """
+    ts   = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3]  # ms precision
+    line = f"{ts}\tfetched LLM via {model}\tsent={sent_tokens}\trecv={recv_tokens}\tms={elapsed_ms}\n"
+    try:
+        with open(log_path, 'a', encoding='utf-8', newline='\n') as f:
+            fcntl.flock(f, fcntl.LOCK_EX)
+            try:
+                f.write(line)
+                f.flush()
+            finally:
+                fcntl.flock(f, fcntl.LOCK_UN)
+    except Exception as exc:
+        logger.warning(f"LLM log write failed: {exc}")
+
+
+def _call_llm(model, messages, api_key=None, api_base=None, max_tokens=600, conn_id=None, tpm=0, log_path=None):
     """
     Thin wrapper around litellm.completion.
     Isolated as a named function so tests can mock.patch it without importing litellm.
@@ -118,6 +143,8 @@ def _call_llm(model, messages, api_key=None, api_base=None, max_tokens=600, conn
                       check is performed before calling the API.  Raises _RateLimitWait if
                       the bucket is empty so the worker can re-queue without retrying.
 
+    log_path        — when set, each call is appended to the datastore LLM activity log.
+
     Returns the response text string.
     """
     import litellm
@@ -143,21 +170,19 @@ def _call_llm(model, messages, api_key=None, api_base=None, max_tokens=600, conn
     if api_base:
         kwargs['api_base'] = api_base
 
+    t0       = time.monotonic()
     response = litellm.completion(**kwargs)
+    elapsed_ms = round((time.monotonic() - t0) * 1000)
+
+    if log_path:
+        usage      = getattr(response, 'usage', None)
+        sent_tok   = getattr(usage, 'prompt_tokens',     0) or 0
+        recv_tok   = getattr(usage, 'completion_tokens', 0) or 0
+        _append_llm_log(log_path, model, sent_tok, recv_tok, elapsed_ms)
+
     return response.choices[0].message.content.strip()
 
 
-def _write_summary(watch_dir, snapshot_id, text):
-    """Write the LLM summary to {snapshot_id}-llm.txt alongside the snapshot."""
-    dest = os.path.join(watch_dir, f"{snapshot_id}-llm.txt")
-    tmp = dest + '.tmp'
-    with open(tmp, 'w', encoding='utf-8') as f:
-        f.write(text)
-        f.flush()
-        os.fsync(f.fileno())
-    os.replace(tmp, dest)
-    return dest
-
 
 def _resolve_llm_connection(watch, datastore):
     """Return (model, api_key, api_base, conn_id, tpm) for the given watch.
@@ -206,6 +231,33 @@ SYSTEM_PROMPT = (
 )
 
 
+def _build_context_header(watch, datastore):
+    """Return a short multi-line string describing what this watch monitors.
+
+    Included lines (only when non-empty / non-redundant):
+        URL:      <url>
+        Monitor:  <user title or fetched page title>   (omitted when same as URL)
+        Tags:     <comma-separated tag titles>          (omitted when none)
+    """
+    url   = watch.get('url', '')
+    title = watch.get('title', '') or watch.get('page_title', '')
+
+    lines = [f"URL: {url}"]
+    if title and title != url:
+        lines.append(f"Monitor: {title}")
+
+    tag_titles = []
+    for tag_uuid in (watch.get('tags') or []):
+        tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid, {})
+        t = tag.get('title', '').strip()
+        if t:
+            tag_titles.append(t)
+    if tag_titles:
+        lines.append(f"Tags: {', '.join(tag_titles)}")
+
+    return '\n'.join(lines)
+
+
 def _chunk_lines(lines, model, chunk_token_size):
     """Split lines into chunks that each fit within chunk_token_size tokens."""
     import litellm
@@ -222,7 +274,7 @@ def _chunk_lines(lines, model, chunk_token_size):
     return chunks
 
 
-def _enumerate_changes(diff_text, url, model, llm_kwargs):
+def _enumerate_changes(diff_text, context_header, model, llm_kwargs):
     """
     Pass 1 — ask the model to list every distinct change exhaustively, one per line.
     Returns a plain-text list string.
@@ -233,7 +285,7 @@ def _enumerate_changes(diff_text, url, model, llm_kwargs):
         {
             'role': 'user',
             'content': (
-                f"URL: {url}\n"
+                f"{context_header}\n"
                 f"Diff:\n{diff_text}\n\n"
                 "List every distinct change you see, one item per line. "
                 "Be exhaustive — do not filter or prioritise. "
@@ -245,7 +297,7 @@ def _enumerate_changes(diff_text, url, model, llm_kwargs):
     return _call_llm(model=model, messages=messages, max_tokens=1200, **llm_kwargs)
 
 
-def _summarise_enumeration(enumerated, url, model, llm_kwargs, summary_instruction=None):
+def _summarise_enumeration(enumerated, context_header, model, llm_kwargs, summary_instruction=None):
     """
     Pass 2 — compress the exhaustive enumeration into the final output.
     Operates on a small, structured input so nothing is lost that wasn't already listed.
@@ -260,7 +312,7 @@ def _summarise_enumeration(enumerated, url, model, llm_kwargs, summary_instructi
         {
             'role': 'user',
             'content': (
-                f"URL: {url}\n"
+                f"{context_header}\n"
                 f"All changes detected:\n{enumerated}\n\n"
                 + instruction
             ),
@@ -318,23 +370,14 @@ def process_llm_summary(item, datastore):
     before_text  = _read_snapshot(watch, history[history_keys[idx - 1]])
     current_text = _read_snapshot(watch, history[history_keys[idx]])
 
-    diff_lines = list(difflib.unified_diff(
-        before_text.splitlines(),
-        current_text.splitlines(),
-        lineterm='',
-        n=2,
-    ))
-    diff_text = '\n'.join(diff_lines)
-
-    if not diff_text.strip():
-        logger.debug(f"LLM: no diff content for {uuid}/{snapshot_id}, skipping")
-        return
-
     # Resolve model / credentials via connections table (with legacy flat-field fallback)
     model, api_key, api_base, conn_id, tpm = _resolve_llm_connection(watch, datastore)
-    url = watch.get('url', '')
+    url            = watch.get('url', '')
+    context_header = _build_context_header(watch, datastore)
 
-    llm_kwargs = {}
+    llm_kwargs = {
+        'log_path': os.path.join(datastore.datastore_path, 'llm-log.txt'),
+    }
     if api_key:
         llm_kwargs['api_key'] = api_key
     if api_base:
@@ -344,13 +387,26 @@ def process_llm_summary(item, datastore):
     if tpm:
         llm_kwargs['tpm'] = tpm
 
-    # Use custom prompt if configured, otherwise fall back to the built-in default
+    # Use custom prompt / context-line setting if configured
     from changedetectionio.llm.plugin import get_llm_settings
     llm_settings  = get_llm_settings(datastore)
     custom_prompt = (llm_settings.get('llm_summary_prompt') or '').strip()
     summary_instruction = custom_prompt if custom_prompt else (
         "Analyse all changes in this diff.\n\n" + STRUCTURED_OUTPUT_INSTRUCTION
     )
+    context_n = int(llm_settings.get('llm_diff_context_lines') or 2)
+
+    diff_lines = list(difflib.unified_diff(
+        before_text.splitlines(),
+        current_text.splitlines(),
+        lineterm='',
+        n=context_n,
+    ))
+    diff_text = '\n'.join(diff_lines)
+
+    if not diff_text.strip():
+        logger.debug(f"LLM: no diff content for {uuid}/{snapshot_id}, skipping")
+        return
 
     diff_tokens = litellm.token_counter(model=model, text=diff_text)
     logger.debug(f"LLM: diff is {diff_tokens} tokens for {uuid}/{snapshot_id}")
@@ -362,7 +418,7 @@ def process_llm_summary(item, datastore):
             {
                 'role': 'user',
                 'content': (
-                    f"URL: {url}\n"
+                    f"{context_header}\n"
                     f"Diff:\n{diff_text}\n\n"
                     + summary_instruction
                 ),
@@ -373,8 +429,8 @@ def process_llm_summary(item, datastore):
 
     elif diff_tokens < TOKEN_TWO_PASS_THRESHOLD:
         # Medium diff — two-pass: enumerate exhaustively, then compress
-        enumerated = _enumerate_changes(diff_text, url, model, llm_kwargs)
-        raw        = _summarise_enumeration(enumerated, url, model, llm_kwargs, summary_instruction)
+        enumerated = _enumerate_changes(diff_text, context_header, model, llm_kwargs)
+        raw        = _summarise_enumeration(enumerated, context_header, model, llm_kwargs, summary_instruction)
         strategy   = 'two-pass'
 
     else:
@@ -386,11 +442,11 @@ def process_llm_summary(item, datastore):
         for i, chunk in enumerate(chunks):
             logger.debug(f"LLM: enumerating chunk {i+1}/{len(chunks)}")
             chunk_enumerations.append(
-                _enumerate_changes(chunk, url, model, llm_kwargs)
+                _enumerate_changes(chunk, context_header, model, llm_kwargs)
             )
 
         combined = '\n'.join(chunk_enumerations)
-        raw      = _summarise_enumeration(combined, url, model, llm_kwargs, summary_instruction)
+        raw      = _summarise_enumeration(combined, context_header, model, llm_kwargs, summary_instruction)
         strategy = 'map-reduce'
 
     llm_data = parse_llm_response(raw)
diff --git a/changedetectionio/llm/settings_form.py b/changedetectionio/llm/settings_form.py
index 53df1940..1762897a 100644
--- a/changedetectionio/llm/settings_form.py
+++ b/changedetectionio/llm/settings_form.py
@@ -44,7 +44,7 @@ class LLMConnectionEntryForm(Form):
     name              = StringField(_l('Name'),          validators=[Optional(), Length(max=100)])
     model             = StringField(_l('Model string'),  validators=[Optional(), Length(max=200)])
     api_key           = StringField(_l('API Key'),       validators=[Optional(), Length(max=500)])
-    api_base          = StringField(_l('Base URL'),      validators=[Optional(), Length(max=500)])
+    api_base          = StringField(_l('API Endpoint'),  validators=[Optional(), Length(max=500)])
     tokens_per_minute = IntegerField(_l('Tokens/min'),   validators=[Optional(), NumberRange(min=0, max=10_000_000)], default=0)
     is_default        = BooleanField(_l('Default'),      validators=[Optional()])
 
@@ -95,7 +95,7 @@ class LLMNewConnectionForm(Form):
     api_key           = PasswordField(_l('API Key'),
                             render_kw={'id': 'llm-add-key',   'size': 40,
                                        'placeholder': 'sk-…', 'autocomplete': 'off'})
-    api_base          = StringField(_l('Base URL'),
+    api_base          = StringField(_l('API Endpoint'),
                             render_kw={'id': 'llm-add-base',  'size': 40,
                                        'placeholder': 'http://localhost:11434', 'autocomplete': 'off'})
     tokens_per_minute = IntegerField(_l('Tokens/min'), default=0,
@@ -113,6 +113,17 @@ class LLMSettingsForm(Form):
     llm_connection  = FieldList(FormField(LLMConnectionEntryForm), min_entries=0)
     new_connection  = FormField(LLMNewConnectionForm)
 
+    llm_diff_context_lines = IntegerField(
+        _l('Diff context lines'),
+        validators=[Optional(), NumberRange(min=0, max=20)],
+        default=2,
+        description=_l(
+            'Number of unchanged lines shown around each change in the diff. '
+            'More lines give the LLM more context but increase token usage. (default: 2)'
+        ),
+        render_kw={'style': 'width: 5em;', 'min': '0', 'max': '20'},
+    )
+
     llm_summary_prompt = TextAreaField(
         _l('Summary prompt'),
         validators=[Optional()],
diff --git a/changedetectionio/llm/templates/settings-llm.html b/changedetectionio/llm/templates/settings-llm.html
index ba14ef3d..f97c4b9d 100644
--- a/changedetectionio/llm/templates/settings-llm.html
+++ b/changedetectionio/llm/templates/settings-llm.html
@@ -64,9 +64,9 @@ var LLM_I18N = {
             <button type="button" id="llm-key-toggle" class="pure-button">{{ _('show') }}</button>
         </div>
     </div>
-    <div class="pure-control-group">
+    <div class="pure-control-group" id="llm-base-group" style="display:none">
         <label for="llm-add-base">
-            {{ _('Base URL') }}
+            {{ _('API Endpoint') }}
             <span class="pure-form-message-inline">({{ _('optional') }})</span>
         </label>
         {{ nf.api_base() }}
@@ -87,6 +87,13 @@ var LLM_I18N = {
 {# ── Prompt configuration ────────────────────────────────── #}
 <fieldset>
     <legend>{{ _('Summary Prompt') }}</legend>
+    <div class="pure-control-group">
+        {{ plugin_form.llm_diff_context_lines.label }}
+        {{ plugin_form.llm_diff_context_lines() }}
+        <span class="pure-form-message-inline">
+            {{ _('Unchanged lines shown around each change in the diff sent to the LLM. More lines = more context but higher token cost. (default: 2)') }}
+        </span>
+    </div>
     <div class="pure-control-group">
         {{ render_field(plugin_form.llm_summary_prompt) }}
         <span class="pure-form-message-inline">
diff --git a/changedetectionio/llm/tokens.py b/changedetectionio/llm/tokens.py
index 9ff81efb..390d1d1f 100644
--- a/changedetectionio/llm/tokens.py
+++ b/changedetectionio/llm/tokens.py
@@ -1,8 +1,8 @@
 """
 LLM notification token definitions and file I/O helpers.
 
-All LLM data for a snapshot is stored in a single JSON sidecar file:
-    {data_dir}/{snapshot_id}-llm.json
+All LLM data for a snapshot is stored under a dedicated subdirectory:
+    {data_dir}/llm/{snapshot_id}-llm.json
 
 A plain-text {snapshot_id}-llm.txt is also written containing just the
 summary field, for backward compatibility with any code that already reads it.
@@ -48,12 +48,17 @@ STRUCTURED_OUTPUT_INSTRUCTION = (
 
 # ── File I/O ───────────────────────────────────────────────────────────────
 
+def llm_subdir(data_dir: str) -> str:
+    """Return the llm/ subdirectory path (does not create it)."""
+    return os.path.join(data_dir, 'llm')
+
+
 def llm_json_path(data_dir: str, snapshot_id: str) -> str:
-    return os.path.join(data_dir, f"{snapshot_id}-llm.json")
+    return os.path.join(llm_subdir(data_dir), f"{snapshot_id}-llm.json")
 
 
 def llm_txt_path(data_dir: str, snapshot_id: str) -> str:
-    return os.path.join(data_dir, f"{snapshot_id}-llm.txt")
+    return os.path.join(llm_subdir(data_dir), f"{snapshot_id}-llm.txt")
 
 
 def is_llm_data_ready(data_dir: str, snapshot_id: str) -> bool:
@@ -93,16 +98,19 @@ def read_llm_tokens(data_dir: str, snapshot_id: str) -> dict:
 
 def write_llm_data(data_dir: str, snapshot_id: str, data: dict) -> str:
     """
-    Atomically write LLM data.
+    Atomically write LLM data to the llm/ subdirectory.
 
     Writes:
-      {snapshot_id}-llm.json  — full structured data (all tokens)
-      {snapshot_id}-llm.txt   — plain summary text (backward compat)
+      llm/{snapshot_id}-llm.json  — full structured data (all tokens)
+      llm/{snapshot_id}-llm.txt   — plain summary text (backward compat)
 
     Returns the path of the JSON file.
     """
     normalised = _normalise(data)
 
+    subdir = llm_subdir(data_dir)
+    os.makedirs(subdir, exist_ok=True)
+
     json_file = llm_json_path(data_dir, snapshot_id)
     _atomic_write_text(json_file, json.dumps(normalised, ensure_ascii=False))
 
diff --git a/changedetectionio/static/js/llm.js b/changedetectionio/static/js/llm.js
index 2a1c53fc..239919ff 100644
--- a/changedetectionio/static/js/llm.js
+++ b/changedetectionio/static/js/llm.js
@@ -118,15 +118,26 @@
                 serialise();
             });
 
+        function updateBaseVisibility() {
+            var val     = $('#llm-preset').val();
+            var preset  = presetMap[val];
+            var hasBase = preset ? !!preset[3] : (val === 'custom');
+            var show    = (val === 'custom') || hasBase;
+            $('#llm-base-group').toggle(show);
+        }
+
         // Preset dropdown pre-fills add form
         $('#llm-preset').on('change', function () {
-            var p = presetMap[$(this).val()];
-            if (!p) return;
-            $('#llm-add-name').val(p[1].replace(/\s*—.*/, '').trim());
-            $('#llm-add-model').val(p[2]);
-            $('#llm-add-base').val(p[3]);
-            $('#llm-add-tpm').val(p[4] !== undefined ? p[4] : 0);
-            $('#llm-add-key').val('');
+            var val = $(this).val();
+            var p   = presetMap[val];
+            if (p) {
+                $('#llm-add-name').val(p[1].replace(/\s*—.*/, '').trim());
+                $('#llm-add-model').val(p[2]);
+                $('#llm-add-base').val(p[3]);
+                $('#llm-add-tpm').val(p[4] !== undefined ? p[4] : 0);
+                $('#llm-add-key').val('');
+            }
+            updateBaseVisibility();
         });
 
         // Add connection
@@ -148,6 +159,7 @@
             };
             $('#llm-preset, #llm-add-name, #llm-add-model, #llm-add-key, #llm-add-base').val('');
             $('#llm-add-tpm').val('0');
+            $('#llm-base-group').hide();
             renderTable();
             serialise();
         });