Compare commits

...

2 Commits

Author SHA1 Message Date
dgtlmoon 39fa7f9692 WIP 2026-05-30 13:35:49 +02:00
dgtlmoon 1e643b2244 LLM - Enrichen summary and intent/rules with the actual product metadata in the HTML document if it exists for greater precision. 2026-05-30 13:26:01 +02:00
10 changed files with 400 additions and 23 deletions
+9 -2
View File
@@ -267,9 +267,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
from changedetectionio.llm.evaluator import (
summarise_change, get_effective_summary_prompt, build_summary_cache_prompt,
is_global_token_budget_exceeded, get_global_token_budget_month,
LLMInputTooLargeError,
LLMInputTooLargeError, compute_llm_enrichment,
)
# Structured-metadata enrichment from the raw HTML of the "to" version (only the
# 2 newest fetched-HTML snapshots are retained; older pairs simply get no enrichment).
# Must be computed the same way as the worker pre-cache so the cache key matches.
_llm_raw_html = watch.get_fetched_html(to_version) or ''
_llm_metadata = compute_llm_enrichment(watch, datastore, _llm_raw_html, diff_text)
# Diff-pref flags + system prompt + active model are part of the cache key
# so prompt or model changes bust the cache.
from changedetectionio.llm.evaluator import get_llm_settings
@@ -281,6 +287,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
max_summary_tokens=_max_summary_tokens,
prefs=prefs,
model=_llm_model,
metadata=_llm_metadata,
)
# Check cache — keyed by version pair + prompt hash (invalidates if prompt changes)
@@ -306,7 +313,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
}), 429
try:
summary = summarise_change(watch, datastore, diff=diff_text, current_snapshot=to_text)
summary = summarise_change(watch, datastore, diff=diff_text, current_snapshot=to_text, metadata=_llm_metadata)
except LLMInputTooLargeError as e:
return jsonify({'summary': None, 'error': str(e)}), 400
except Exception as e:
+53 -5
View File
@@ -68,6 +68,43 @@ def _get_max_input_chars(datastore) -> int:
return _DEFAULT_MAX_INPUT_CHARS
def compute_llm_enrichment(watch, datastore, raw_html: str, base_text: str) -> str:
"""
Collect verbatim structured-metadata enrichment (via the llm_context_enrich
plugin hook) to append to an LLM prompt, or '' when there's nothing usable.
Sizing is governed by the single configurable budget, max_input_chars — there is
no hardcoded cap. If the enrichment would push base_text + metadata over that
budget it is DROPPED (the diff/content alone still goes through), so adding the
feature can never turn a previously-working call into an over-size failure.
The result is deterministic for a given (raw_html, base_text, budget), so callers
that also fold it into a cache key (the summary cache) stay consistent.
"""
if not raw_html:
return ''
try:
from changedetectionio.pluggy_interface import collect_llm_context_enrichment
meta = collect_llm_context_enrichment(watch, raw_html, datastore)
except Exception as e:
logger.debug(f"{watch.get('uuid')} - LLM - enrichment collection failed: {e}")
return ''
if not meta:
return ''
max_chars = _get_max_input_chars(datastore)
if len(base_text or '') + len(meta) > max_chars:
logger.debug(
f"{watch.get('uuid')} - LLM - enrichment of {len(meta)} bytes of metadata "
f"DROPPED: would exceed max_input_chars budget ({len(base_text or '')} + "
f"{len(meta)} > {max_chars})"
)
return ''
logger.debug(f"{watch.get('uuid')} - LLM - enrichening query/prompt with {len(meta)} bytes of metadata")
return meta
class LLMInputTooLargeError(Exception):
pass
@@ -541,7 +578,8 @@ class DiffPrefs:
def build_summary_cache_prompt(effective_prompt: str, max_summary_tokens: int,
prefs: DiffPrefs = None, model: str = '') -> str:
prefs: DiffPrefs = None, model: str = '',
metadata: str = '') -> str:
"""
Compose the full cache-key string passed to save/get_llm_diff_summary.
@@ -553,6 +591,10 @@ def build_summary_cache_prompt(effective_prompt: str, max_summary_tokens: int,
The active model name is folded into the key so switching models
(e.g. qwen3 → gpt-4o) invalidates stale summaries that were generated
by a different model with potentially different phrasing/quality.
`metadata` (the appended structured-data block) is folded in too: two checks can
produce the same text diff but different current metadata, and a stale cached
summary must not be served when the appended facts have changed.
"""
if prefs is None:
prefs = DiffPrefs()
@@ -562,10 +604,12 @@ def build_summary_cache_prompt(effective_prompt: str, max_summary_tokens: int,
+ f'\x00sys:{build_change_summary_system_prompt()}'
+ f'\x00max_tokens:{max_summary_tokens}'
+ f'\x00model:{model}'
+ f'\x00meta:{metadata}'
)
def summarise_change(watch, datastore, diff: str, current_snapshot: str = '') -> str:
def summarise_change(watch, datastore, diff: str, current_snapshot: str = '',
metadata: str = '') -> str:
"""
Generate a plain-language summary of the change using the watch's
llm_change_summary prompt (cascades from tag if not set on watch).
@@ -603,6 +647,7 @@ def summarise_change(watch, datastore, diff: str, current_snapshot: str = '') ->
current_snapshot=current_snapshot,
url=url,
title=title,
metadata=metadata,
)
settings = get_llm_settings(datastore)
@@ -704,12 +749,14 @@ def preview_extract(watch, datastore, content: str) -> dict | None:
# Per-change evaluation
# ---------------------------------------------------------------------------
def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '') -> dict | None:
def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '',
metadata: str = '') -> dict | None:
"""
Evaluate whether `diff` matches the watch's intent.
Returns {'important': bool, 'summary': str} or None if LLM not configured / no intent.
Results are cached by (intent, diff) hash — each unique diff is evaluated exactly once.
Results are cached by (intent, diff, metadata) hash — each unique diff+metadata is
evaluated exactly once. `metadata` is the appended verbatim structured-data block.
"""
cfg = _runtime_llm_config(datastore)
if not cfg:
@@ -725,7 +772,7 @@ def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '') ->
_check_input_size(diff, _get_max_input_chars(datastore))
# Cache lookup — evaluations are deterministic once cached
cache_key = hashlib.sha256(f"{intent}||{diff}".encode()).hexdigest()
cache_key = hashlib.sha256(f"{intent}||{diff}||{metadata}".encode()).hexdigest()
cache = watch.get('llm_evaluation_cache') or {}
if cache_key in cache:
logger.debug(f"LLM cache hit for {watch.get('uuid')} key={cache_key[:8]}")
@@ -758,6 +805,7 @@ def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '') ->
current_snapshot=current_snapshot,
url=url,
title=title,
metadata=metadata,
)
settings = get_llm_settings(datastore)
+11 -2
View File
@@ -41,10 +41,13 @@ def _annotate_moved_lines(diff_text: str) -> str:
def build_eval_prompt(intent: str, diff: str, current_snapshot: str = '',
url: str = '', title: str = '') -> str:
url: str = '', title: str = '', metadata: str = '') -> str:
"""
Build the user message for a diff evaluation call.
The system prompt is kept separate (see build_eval_system_prompt).
`metadata` is verbatim current-state structured data (JSON-LD/OpenGraph) appended
last so the model can compare the diff against canonical current values.
"""
parts = []
@@ -62,6 +65,9 @@ def build_eval_prompt(intent: str, diff: str, current_snapshot: str = '',
parts.append(f"\nWhat changed (diff):\n{diff}")
if metadata:
parts.append(f"\n{metadata}")
return '\n'.join(parts)
@@ -132,7 +138,8 @@ def build_preview_system_prompt() -> str:
def build_change_summary_prompt(diff: str, custom_prompt: str,
current_snapshot: str = '', url: str = '', title: str = '') -> str:
current_snapshot: str = '', url: str = '', title: str = '',
metadata: str = '') -> str:
"""
Build the user message for an AI Change Summary call.
The user supplies their own instructions (custom_prompt); this wraps them
@@ -152,6 +159,8 @@ def build_change_summary_prompt(diff: str, custom_prompt: str,
parts.append(f"Page title: {title}")
parts.append(f"Instructions: {custom_prompt}")
parts.append(f"\nWhat changed (diff):\n{_annotate_moved_lines(diff)}")
if metadata:
parts.append(f"\n{metadata}")
return '\n'.join(parts)
+56 -8
View File
@@ -175,6 +175,30 @@ class ChangeDetectionSpec:
"""
pass
@hookspec
def llm_context_enrich(watch, html_content, datastore):
"""Return extra current-state context to append to LLM intent/summary prompts.
Called for any watch with an LLM intent or change-summary when raw HTML is
available. Plugins can surface structured facts the html-to-text snapshot has
dropped — e.g. JSON-LD / OpenGraph product metadata — so the model can answer
intents like "alert when the SKU changes" or "list the product IDs".
The returned text is appended verbatim to the prompt; the caller is responsible
for fitting it within the configurable max_input_chars budget (it drops the
enrichment if it would not fit), so implementations should NOT impose their own
size limits.
Args:
watch: The watch dict being evaluated.
html_content: The raw HTML of the current page (may be '' / None).
datastore: The application datastore.
Returns:
str or None: Context text to append, or None if nothing to add.
"""
pass
@hookspec
def get_html_head_extras():
"""Return HTML to inject into the <head> of every page via base.html.
@@ -323,14 +347,17 @@ def register_builtin_restock_plugins():
(restock_diff/__init__.py → model.Watch → content_fetchers → pluggy_interface).
"""
import importlib
module_path = 'changedetectionio.processors.restock_diff.plugins.llm_restock'
try:
module = importlib.import_module(module_path)
if not plugin_manager.is_registered(module):
plugin_manager.register(module, 'llm_restock')
logger.debug("Registered built-in restock plugin: llm_restock")
except Exception as e:
logger.error(f"Failed to register llm_restock plugin: {e}")
for module_path, plugin_name in (
('changedetectionio.processors.restock_diff.plugins.llm_restock', 'llm_restock'),
('changedetectionio.processors.restock_diff.plugins.llm_metadata_enrich', 'llm_metadata_enrich'),
):
try:
module = importlib.import_module(module_path)
if not plugin_manager.is_registered(module):
plugin_manager.register(module, plugin_name)
logger.debug(f"Registered built-in restock plugin: {plugin_name}")
except Exception as e:
logger.error(f"Failed to register {plugin_name} plugin: {e}")
# Helper function to collect UI stats extras from all plugins
def collect_ui_edit_stats_extras(watch):
@@ -403,6 +430,27 @@ def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instanc
return None
def collect_llm_context_enrichment(watch, html_content, datastore):
"""Collect and combine LLM context enrichment from all plugins.
Returns the concatenated non-empty plugin strings (blank-line separated), or ''
when no plugin contributes anything. No size limit is applied here — the caller
enforces the single configurable max_input_chars budget.
"""
try:
results = plugin_manager.hook.llm_context_enrich(
watch=watch,
html_content=html_content,
datastore=datastore,
)
except Exception as e:
logger.debug(f"llm_context_enrich hook failed: {e}")
return ''
parts = [r.strip() for r in results if r and isinstance(r, str) and r.strip()]
return '\n\n'.join(parts) if parts else ''
def get_active_plugins():
"""Get a list of active plugins with their descriptions.
@@ -0,0 +1,32 @@
"""
LLM context enrichment plugin — structured product/page metadata.
Surfaces the page's structured metadata (JSON-LD + OpenGraph site/type) verbatim
so it can be appended to the LLM intent/summary prompts. This lets user intents
and summary prompts reference facts the html-to-text snapshot has stripped out —
prices, SKUs/GTINs, availability, ratings, article dates, page kind, etc.
Extraction reuses the memory-safe pure_python_extractor (stdlib html.parser, no
lxml/libxml2), so it is safe to run on every changed watch without the C-level
memory leak that extruct/lxml carries. It performs NO LLM call of its own and
imposes no size limit — the evaluator enforces the single configurable
max_input_chars budget and drops the enrichment if it would not fit.
"""
from loguru import logger
from changedetectionio.pluggy_interface import hookimpl
@hookimpl
def llm_context_enrich(watch, html_content, datastore):
"""Return verbatim structured metadata for the current page, or None."""
if not html_content:
return None
try:
from changedetectionio.processors.restock_diff.pure_python_extractor import extract_metadata_for_llm
block = extract_metadata_for_llm(html_content)
except Exception as e:
logger.debug(f"llm_metadata_enrich: extraction failed: {e}")
return None
return block or None
@@ -287,3 +287,84 @@ def query_price_availability(extracted_data):
# using something like babel you need to know the locale of the website and even then it can be problematic
# we dont really do anything with the price data so far.. so just accept it the way it comes.
return result
# =============================================================================
# Structured metadata for the LLM enricher — passed through verbatim
# =============================================================================
#
# This surfaces the page's structured metadata (JSON-LD + OpenGraph site/type)
# as-is for the LLM intent/summary prompts. We deliberately do NOT curate, field-
# cherry-pick, or impose a size limit here:
#
# * LLMs are trained on schema.org JSON-LD and read it natively, so handing it
# over verbatim lets ANY user intent ("list the SKUs", "did the release date
# change?", "is it a recipe or a product?") work without us pre-guessing which
# fields matter — and it covers non-product pages (NewsArticle, Event, JobPosting…)
# for free.
# * There is exactly one configurable budget for how much text reaches the LLM —
# max_input_chars (env LLM_MAX_INPUT_CHARS → settings → default), enforced by the
# evaluator. A second hardcoded cap here would be a competing, non-configurable
# source of truth. The caller decides how much fits.
#
# Extraction reuses the memory-safe extract_metadata_pure_python() (stdlib
# html.parser, no lxml/libxml2) so it is safe to call on every changed watch
# without the C-level leak extruct/lxml carries, and it is robust to dangling/
# unclosed <script type="application/ld+json"> blocks (HTMLParser only emits a
# block on a real closing tag, so an unterminated blob is dropped rather than
# swallowing the rest of the document the way a greedy regex would).
# =============================================================================
def extract_metadata_for_llm(html_content) -> str:
"""
Return the page's structured metadata verbatim for LLM context, or '' if none.
Output (either part omitted when absent):
Page context: site: ExampleShop | og:type: product
Structured metadata found on the page (JSON-LD):
{"@type":"Product","name":"Acme Widget","sku":"12345", ...}
{"@type":"BreadcrumbList", ...}
JSON-LD blocks are re-serialised compactly (this only strips source whitespace
— the data is byte-for-byte the same schema.org structure). No truncation or
field selection is applied; sizing is the caller's single configurable budget.
"""
if not html_content:
return ''
try:
data = extract_metadata_pure_python(html_content)
except Exception as e:
logger.debug(f"Metadata for LLM: extraction failed: {e}")
return ''
parts = []
# OpenGraph site/type — page-kind context that is NOT carried in JSON-LD,
# so the model can tell an e-shop listing from a news feed.
og = data.get('opengraph', {})
ctx = []
if og.get('og:site_name'):
ctx.append(f"site: {og['og:site_name']}")
if og.get('og:type'):
ctx.append(f"og:type: {og['og:type']}")
if ctx:
parts.append('Page context: ' + ' | '.join(ctx))
# JSON-LD verbatim (compact re-dump only — whitespace normalisation, not curation).
nodes = data.get('json-ld', [])
if nodes:
try:
blob = '\n'.join(
json.dumps(n, ensure_ascii=False, separators=(',', ':'))
for n in nodes
)
except (TypeError, ValueError) as e:
logger.debug(f"Metadata for LLM: JSON-LD re-serialise failed: {e}")
blob = ''
if blob:
parts.append('Structured metadata found on the page (JSON-LD):\n' + blob)
return '\n'.join(parts)
@@ -210,7 +210,8 @@ class TestEvaluateChange:
diff = '- $500\n+ $400'
intent = 'flag price drops'
cache_key = hashlib.sha256(f"{intent}||{diff}".encode()).hexdigest()
metadata = '' # no enrichment in this test; folded into the key as a trailing ||
cache_key = hashlib.sha256(f"{intent}||{diff}||{metadata}".encode()).hexdigest()
watch['llm_evaluation_cache'] = {
cache_key: {'important': True, 'summary': 'cached result'}
}
@@ -7,6 +7,7 @@ import pytest
from changedetectionio.llm.prompt_builder import (
build_eval_prompt,
build_eval_system_prompt,
build_change_summary_prompt,
build_setup_prompt,
build_setup_system_prompt,
SNAPSHOT_CONTEXT_CHARS,
@@ -71,6 +72,49 @@ class TestBuildEvalPrompt:
assert len(prompt_without) < len(prompt_with)
class TestMetadataEnrichmentInPrompts:
"""The verbatim structured-metadata block must land in the eval/summary/preview
user prompts when provided, and leave them unchanged when absent."""
METADATA = (
"Page context: site: ExampleShop | og:type: product\n"
"Structured metadata found on the page (JSON-LD):\n"
'{"@type":"Product","name":"Acme Widget","sku":"12345","color":"blue"}'
)
def test_eval_prompt_includes_metadata(self):
prompt = build_eval_prompt(intent='alert on SKU change', diff='- a\n+ b',
metadata=self.METADATA)
assert self.METADATA in prompt
# A field we never whitelisted must survive verbatim
assert '"sku":"12345"' in prompt
assert '"color":"blue"' in prompt
# The block is appended AFTER the diff (diff stays the freshest pre-metadata content)
assert prompt.index('What changed (diff):') < prompt.index('Structured metadata found')
def test_eval_prompt_unchanged_without_metadata(self):
with_meta = build_eval_prompt(intent='i', diff='d', metadata=self.METADATA)
without = build_eval_prompt(intent='i', diff='d')
assert 'Structured metadata found' not in without
assert len(without) < len(with_meta)
def test_summary_prompt_includes_metadata(self):
prompt = build_change_summary_prompt(diff='- a\n+ b', custom_prompt='list the SKUs',
metadata=self.METADATA)
assert self.METADATA in prompt
assert '"sku":"12345"' in prompt
def test_summary_prompt_unchanged_without_metadata(self):
without = build_change_summary_prompt(diff='- a\n+ b', custom_prompt='x')
assert 'Structured metadata found' not in without
def test_empty_metadata_appends_nothing(self):
# Falsy metadata ('') must not add a trailing block/whitespace section
assert build_eval_prompt(intent='i', diff='d', metadata='') == build_eval_prompt(intent='i', diff='d')
assert (build_change_summary_prompt(diff='d', custom_prompt='c', metadata='')
== build_change_summary_prompt(diff='d', custom_prompt='c'))
class TestBuildEvalSystemPrompt:
def test_returns_string(self):
result = build_eval_system_prompt()
@@ -0,0 +1,100 @@
#!/usr/bin/env python3
# coding=utf-8
"""Unit tests for the memory-safe, verbatim structured-metadata block used by the LLM enricher.
Run: python -m unittest changedetectionio.tests.unit.test_product_metadata_summary
"""
import json
import unittest
from changedetectionio.processors.restock_diff.pure_python_extractor import (
extract_metadata_for_llm,
)
def _page(*scripts, head_extra=''):
body = '\n'.join(scripts)
return f'<html><head>{head_extra}</head><body>{body}</body></html>'
class TestExtractMetadataForLLM(unittest.TestCase):
def test_jsonld_passed_through_verbatim(self):
html = _page('''
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"Product","name":"Acme Widget",
"sku":"12345","color":"blue","releaseDate":"2026-01-02",
"offers":{"@type":"Offer","price":"249.00","priceCurrency":"USD","availability":"https://schema.org/InStock"}}
</script>''')
out = extract_metadata_for_llm(html)
# Verbatim: fields we never "whitelisted" must still be present
self.assertIn('JSON-LD', out)
self.assertIn('"name":"Acme Widget"', out)
self.assertIn('"sku":"12345"', out)
self.assertIn('"color":"blue"', out)
self.assertIn('"releaseDate":"2026-01-02"', out)
self.assertIn('"availability":"https://schema.org/InStock"', out)
def test_no_size_or_count_limit_is_imposed(self):
# 50 products → all 50 must appear; sizing is the caller's budget, not ours.
prods = [f'{{"@type":"Product","name":"P{i}","sku":"S{i}"}}' for i in range(50)]
html = _page(f'<script type="application/ld+json">[{",".join(prods)}]</script>')
out = extract_metadata_for_llm(html)
self.assertIn('"name":"P0"', out)
self.assertIn('"name":"P49"', out)
self.assertNotIn('more products', out) # no truncation marker
def test_non_product_types_included(self):
# News / events / etc. are passed through too — not product-only.
html = _page('''<script type="application/ld+json">
{"@type":"NewsArticle","headline":"Big news","datePublished":"2026-05-30"}
</script>''')
out = extract_metadata_for_llm(html)
self.assertIn('"@type":"NewsArticle"', out)
self.assertIn('"headline":"Big news"', out)
def test_compact_reserialisation_is_valid_json(self):
html = _page('''<script type="application/ld+json">
{ "@type" : "Product" , "name" : "Spaced Out" }
</script>''')
out = extract_metadata_for_llm(html)
blob_line = out.splitlines()[-1]
# The re-dumped line must round-trip as valid JSON
self.assertEqual(json.loads(blob_line)['name'], 'Spaced Out')
def test_opengraph_page_context(self):
html = _page(
'<script type="application/ld+json">{"@type":"ItemList"}</script>',
head_extra='''
<meta property="og:site_name" content="ExampleShop">
<meta property="og:type" content="product.group">
''',
)
out = extract_metadata_for_llm(html)
self.assertIn('Page context: site: ExampleShop', out)
self.assertIn('og:type: product.group', out)
self.assertIn('"@type":"ItemList"', out)
def test_dangling_unclosed_jsonld_is_safe(self):
# An unterminated ld+json block must NOT swallow the document nor crash.
html = (
'<html><body>'
'<script type="application/ld+json">{"@type":"Product","name":"Broken","sku":"X"'
'<div>rest of page</div>'
'</body></html>'
)
self.assertEqual(extract_metadata_for_llm(html), '')
def test_invalid_json_skipped(self):
html = _page('<script type="application/ld+json">{not valid json,,}</script>')
self.assertEqual(extract_metadata_for_llm(html), '')
def test_no_metadata_returns_empty(self):
self.assertEqual(extract_metadata_for_llm('<html><body><p>hi</p></body></html>'), '')
self.assertEqual(extract_metadata_for_llm(''), '')
if __name__ == '__main__':
unittest.main()
+12 -5
View File
@@ -450,7 +450,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
try:
from changedetectionio.llm.evaluator import (
evaluate_change, resolve_intent, resolve_llm_field,
summarise_change, _runtime_llm_config,
summarise_change, _runtime_llm_config, compute_llm_enrichment,
)
# _runtime_llm_config returns None (and logs a debug skip
# message) when the master 'llm_enabled' toggle is off, so
@@ -476,14 +476,20 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
else:
_diff_text = contents
# Structured-metadata enrichment (verbatim JSON-LD/OpenGraph) from the
# raw HTML, appended to both the intent and summary prompts. Computed once
# and dropped automatically if it won't fit the max_input_chars budget.
_llm_raw_html = getattr(getattr(update_handler, 'fetcher', None), 'content', '') or ''
_llm_metadata = compute_llm_enrichment(watch, datastore, _llm_raw_html, _diff_text)
# Step 1: AI Change Intent — may suppress notification
_llm_intent, _llm_intent_source = resolve_intent(watch, datastore)
if _llm_intent:
set_watch_minitext_status(watch, "AI/LLM (rules)..")
_llm_result = await loop.run_in_executor(
executor,
lambda diff=_diff_text, snap=contents: evaluate_change(
watch, datastore, diff=diff, current_snapshot=snap
lambda diff=_diff_text, snap=contents, meta=_llm_metadata: evaluate_change(
watch, datastore, diff=diff, current_snapshot=snap, metadata=meta
)
)
update_obj['_llm_result'] = _llm_result
@@ -502,8 +508,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
set_watch_minitext_status(watch, "AI/LLM (summary)..")
_change_summary = await loop.run_in_executor(
executor,
lambda diff=_diff_text, snap=contents: summarise_change(
watch, datastore, diff=diff, current_snapshot=snap
lambda diff=_diff_text, snap=contents, meta=_llm_metadata: summarise_change(
watch, datastore, diff=diff, current_snapshot=snap, metadata=meta
)
)
if _change_summary:
@@ -557,6 +563,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
effective_prompt=get_effective_summary_prompt(watch, datastore),
max_summary_tokens=_llm_max_summary_tokens,
model=_llm_model,
metadata=_llm_metadata,
)
watch.save_llm_diff_summary(
update_obj['_llm_change_summary'],