changedetection.io/changedetectionio/tests/llm/test_evaluator.py

"""
Unit tests for changedetectionio/llm/evaluator.py

Uses mocked LLM calls — no real API key needed.
"""
import pytest
from unittest.mock import patch, MagicMock


def _make_datastore(llm_cfg=None, tags=None):
    """Build a minimal datastore-like dict for testing."""
    ds = MagicMock()
    app_settings = {
        'llm': llm_cfg or {},
        'tags': tags or {},
    }
    ds.data = {
        'settings': {
            'application': app_settings,
        }
    }
    return ds


def _make_watch(llm_intent='', llm_change_summary='', tags=None, uuid='test-uuid-1234'):
    w = {}
    w['llm_intent'] = llm_intent
    w['llm_change_summary'] = llm_change_summary
    w['tags'] = tags or []
    w['uuid'] = uuid
    w['url'] = 'https://example.com'
    w['page_title'] = 'Test Page'
    w['llm_evaluation_cache'] = {}
    w['llm_prefilter'] = None
    return w


# ---------------------------------------------------------------------------
# resolve_intent
# ---------------------------------------------------------------------------

class TestResolveIntent:
    def test_watch_intent_takes_priority(self):
        from changedetectionio.llm.evaluator import resolve_intent

        tag = {'title': 'mygroup', 'llm_intent': 'group intent'}
        ds = _make_datastore(tags={'tag-1': tag})
        watch = _make_watch(llm_intent='watch intent', tags=['tag-1'])

        intent, source = resolve_intent(watch, ds)
        assert intent == 'watch intent'
        assert source == 'watch'

    def test_tag_intent_used_when_watch_has_none(self):
        from changedetectionio.llm.evaluator import resolve_intent

        tag = {'title': 'pricing-group', 'llm_intent': 'flag price drops'}
        ds = _make_datastore(tags={'tag-1': tag})
        watch = _make_watch(llm_intent='', tags=['tag-1'])

        intent, source = resolve_intent(watch, ds)
        assert intent == 'flag price drops'
        assert source == 'pricing-group'

    def test_no_intent_anywhere_returns_empty(self):
        from changedetectionio.llm.evaluator import resolve_intent

        ds = _make_datastore()
        watch = _make_watch(llm_intent='')

        intent, source = resolve_intent(watch, ds)
        assert intent == ''
        assert source == ''

    def test_tag_applied_to_all_watches_in_group(self):
        """Tag intent propagates to every watch in the tag (no opt-in needed)."""
        from changedetectionio.llm.evaluator import resolve_intent

        tag = {'title': 'job-board', 'llm_intent': 'new engineering jobs'}
        ds = _make_datastore(tags={'tag-1': tag})

        # Three different watches, all in the tag, none have their own intent
        for watch_uuid in ['uuid-A', 'uuid-B', 'uuid-C']:
            watch = _make_watch(llm_intent='', tags=['tag-1'], uuid=watch_uuid)
            intent, source = resolve_intent(watch, ds)
            assert intent == 'new engineering jobs', f"Watch {watch_uuid} should inherit tag intent"
            assert source == 'job-board'

    def test_whitespace_only_intent_treated_as_empty(self):
        from changedetectionio.llm.evaluator import resolve_intent

        ds = _make_datastore()
        watch = _make_watch(llm_intent='   ')
        intent, source = resolve_intent(watch, ds)
        assert intent == ''

    def test_missing_tag_in_datastore_skipped(self):
        from changedetectionio.llm.evaluator import resolve_intent

        ds = _make_datastore(tags={})  # no tags registered
        watch = _make_watch(llm_intent='', tags=['nonexistent-tag'])
        intent, source = resolve_intent(watch, ds)
        assert intent == ''


# ---------------------------------------------------------------------------
# get_llm_config
# ---------------------------------------------------------------------------

class TestGetLlmConfig:
    def test_returns_none_when_no_model(self):
        from changedetectionio.llm.evaluator import get_llm_config
        ds = _make_datastore(llm_cfg={})
        assert get_llm_config(ds) is None

    def test_returns_config_when_model_set(self):
        from changedetectionio.llm.evaluator import get_llm_config
        cfg = {'model': 'gpt-4o-mini', 'api_key': 'sk-test'}
        ds = _make_datastore(llm_cfg=cfg)
        result = get_llm_config(ds)
        assert result['model'] == 'gpt-4o-mini'

    def test_env_var_overrides_datastore(self):
        """LLM_MODEL env var takes priority over datastore settings."""
        from changedetectionio.llm.evaluator import get_llm_config
        ds = _make_datastore(llm_cfg={'model': 'datastore-model'})
        with patch.dict('os.environ', {'LLM_MODEL': 'ollama/llama3.2', 'LLM_API_KEY': '', 'LLM_API_BASE': ''}):
            result = get_llm_config(ds)
        assert result['model'] == 'ollama/llama3.2'

    def test_env_var_api_key_and_base_included(self):
        """LLM_API_KEY and LLM_API_BASE are picked up alongside LLM_MODEL."""
        from changedetectionio.llm.evaluator import get_llm_config
        ds = _make_datastore()
        env = {'LLM_MODEL': 'gpt-4o', 'LLM_API_KEY': 'env-key', 'LLM_API_BASE': 'http://localhost:11434'}
        with patch.dict('os.environ', env):
            result = get_llm_config(ds)
        assert result['api_key'] == 'env-key'
        assert result['api_base'] == 'http://localhost:11434'

    def test_llm_configured_via_env_true_when_model_set(self):
        """llm_configured_via_env() returns True when LLM_MODEL is set."""
        from changedetectionio.llm.evaluator import llm_configured_via_env
        with patch.dict('os.environ', {'LLM_MODEL': 'gpt-4o-mini'}):
            assert llm_configured_via_env() is True

    def test_llm_configured_via_env_false_when_not_set(self):
        """llm_configured_via_env() returns False when LLM_MODEL is absent."""
        from changedetectionio.llm.evaluator import llm_configured_via_env
        env = {k: '' for k in ['LLM_MODEL', 'LLM_API_KEY', 'LLM_API_BASE']}
        with patch.dict('os.environ', env, clear=False):
            # Ensure LLM_MODEL is truly absent
            import os
            os.environ.pop('LLM_MODEL', None)
            assert llm_configured_via_env() is False


# ---------------------------------------------------------------------------
# evaluate_change
# ---------------------------------------------------------------------------

class TestEvaluateChange:
    def test_returns_none_when_llm_not_configured(self):
        from changedetectionio.llm.evaluator import evaluate_change
        ds = _make_datastore(llm_cfg={})  # no model
        watch = _make_watch(llm_intent='flag price drops')
        result = evaluate_change(watch, ds, diff='- $500\n+ $400')
        assert result is None

    def test_returns_none_when_no_intent(self):
        from changedetectionio.llm.evaluator import evaluate_change
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_intent='')
        result = evaluate_change(watch, ds, diff='some diff')
        assert result is None

    def test_returns_not_important_for_empty_diff(self):
        from changedetectionio.llm.evaluator import evaluate_change
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_intent='flag price drops')
        result = evaluate_change(watch, ds, diff='')
        assert result == {'important': False, 'summary': ''}

    def test_returns_not_important_for_whitespace_diff(self):
        from changedetectionio.llm.evaluator import evaluate_change
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_intent='flag price drops')
        result = evaluate_change(watch, ds, diff='   \n  ')
        assert result == {'important': False, 'summary': ''}

    def test_calls_llm_and_returns_result(self):
        from changedetectionio.llm.evaluator import evaluate_change

        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini', 'api_key': 'sk-test'})
        watch = _make_watch(llm_intent='flag price drops')

        llm_response = '{"important": true, "summary": "Price dropped from $500 to $400"}'
        with patch('changedetectionio.llm.client.completion', return_value=(llm_response, 150)):
            result = evaluate_change(watch, ds, diff='- $500\n+ $400')

        assert result['important'] is True
        assert 'Price dropped' in result['summary']

    def test_cache_hit_skips_llm_call(self):
        from changedetectionio.llm.evaluator import evaluate_change
        import hashlib

        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini', 'api_key': 'sk-test'})
        watch = _make_watch(llm_intent='flag price drops')

        diff = '- $500\n+ $400'
        intent = 'flag price drops'
        cache_key = hashlib.sha256(f"{intent}||{diff}".encode()).hexdigest()
        watch['llm_evaluation_cache'] = {
            cache_key: {'important': True, 'summary': 'cached result'}
        }

        with patch('changedetectionio.llm.client.completion') as mock_llm:
            result = evaluate_change(watch, ds, diff=diff)
            mock_llm.assert_not_called()

        assert result['summary'] == 'cached result'

    def test_llm_failure_returns_important_true(self):
        """On LLM error, notification should NOT be suppressed (fail open)."""
        from changedetectionio.llm.evaluator import evaluate_change

        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini', 'api_key': 'sk-test'})
        watch = _make_watch(llm_intent='flag price drops')

        with patch('changedetectionio.llm.client.completion', side_effect=Exception('API timeout')):
            result = evaluate_change(watch, ds, diff='- $500\n+ $400')

        assert result['important'] is True
        assert result['summary'] == ''

    def test_unimportant_result_from_llm(self):
        from changedetectionio.llm.evaluator import evaluate_change

        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_intent='only alert on price drops')

        llm_response = '{"important": false, "summary": "Only a footer copyright year changed"}'
        with patch('changedetectionio.llm.client.completion', return_value=(llm_response, 45)):
            result = evaluate_change(watch, ds, diff='- Copyright 2023\n+ Copyright 2024')

        assert result['important'] is False
        assert 'footer' in result['summary'].lower() or 'copyright' in result['summary'].lower()

    def test_last_tokens_used_stored_after_eval(self):
        """watch['llm_last_tokens_used'] is set to the token count after a successful call."""
        from changedetectionio.llm.evaluator import evaluate_change

        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_intent='flag price drops')

        llm_response = '{"important": true, "summary": "Price fell"}'
        with patch('changedetectionio.llm.client.completion', return_value=(llm_response, 123)):
            evaluate_change(watch, ds, diff='- $500\n+ $300')

        assert watch.get('llm_last_tokens_used') == 123

    def test_cumulative_tokens_accumulate_across_evals(self):
        """Each eval adds its tokens to watch['llm_tokens_used_cumulative']."""
        from changedetectionio.llm.evaluator import evaluate_change

        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_intent='flag price drops')

        resp1 = '{"important": true, "summary": "First"}'
        resp2 = '{"important": false, "summary": "Second"}'

        with patch('changedetectionio.llm.client.completion', return_value=(resp1, 80)):
            evaluate_change(watch, ds, diff='- $500\n+ $400')

        # Second call needs a different diff to avoid cache hit
        with patch('changedetectionio.llm.client.completion', return_value=(resp2, 60)):
            evaluate_change(watch, ds, diff='- $400\n+ $350')

        assert watch.get('llm_tokens_used_cumulative') == 140


# ---------------------------------------------------------------------------
# Token budget enforcement
# ---------------------------------------------------------------------------

class TestTokenBudget:
    def test_no_limits_always_returns_true(self):
        """When no limits configured, budget check always passes."""
        from changedetectionio.llm.evaluator import _check_token_budget

        watch = _make_watch()
        cfg = {}  # no limits

        assert _check_token_budget(watch, cfg, tokens_this_call=10_000) is True

    def test_per_check_limit_exceeded_returns_false(self):
        """Tokens on this call exceeding per-check limit → False."""
        from changedetectionio.llm.evaluator import _check_token_budget

        watch = _make_watch()
        cfg = {'max_tokens_per_check': 100}

        result = _check_token_budget(watch, cfg, tokens_this_call=150)
        assert result is False

    def test_per_check_limit_not_exceeded_returns_true(self):
        """Tokens on this call within per-check limit → True."""
        from changedetectionio.llm.evaluator import _check_token_budget

        watch = _make_watch()
        cfg = {'max_tokens_per_check': 200}

        result = _check_token_budget(watch, cfg, tokens_this_call=150)
        assert result is True

    def test_cumulative_limit_exceeded_returns_false(self):
        """Total accumulated tokens exceeding cumulative limit → False."""
        from changedetectionio.llm.evaluator import _check_token_budget

        watch = _make_watch()
        watch['llm_tokens_used_cumulative'] = 900
        cfg = {'max_tokens_cumulative': 1000}

        # This call adds 200 → total 1100 > 1000
        result = _check_token_budget(watch, cfg, tokens_this_call=200)
        assert result is False

    def test_cumulative_limit_not_yet_exceeded_returns_true(self):
        """Total accumulated tokens within cumulative limit → True."""
        from changedetectionio.llm.evaluator import _check_token_budget

        watch = _make_watch()
        watch['llm_tokens_used_cumulative'] = 500
        cfg = {'max_tokens_cumulative': 1000}

        result = _check_token_budget(watch, cfg, tokens_this_call=100)
        assert result is True

    def test_tokens_accumulated_into_watch(self):
        """tokens_this_call is added to watch['llm_tokens_used_cumulative']."""
        from changedetectionio.llm.evaluator import _check_token_budget

        watch = _make_watch()
        watch['llm_tokens_used_cumulative'] = 300
        cfg = {}

        _check_token_budget(watch, cfg, tokens_this_call=75)
        assert watch['llm_tokens_used_cumulative'] == 375

    def test_zero_tokens_call_does_not_change_cumulative(self):
        """Calling with tokens_this_call=0 (pre-flight check) doesn't modify cumulative."""
        from changedetectionio.llm.evaluator import _check_token_budget

        watch = _make_watch()
        watch['llm_tokens_used_cumulative'] = 200
        cfg = {}

        _check_token_budget(watch, cfg, tokens_this_call=0)
        assert watch['llm_tokens_used_cumulative'] == 200

    def test_evaluate_change_skips_call_when_cumulative_over_budget(self):
        """Pre-flight cumulative check: if already over budget, skip LLM call and fail open."""
        from changedetectionio.llm.evaluator import evaluate_change

        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini', 'max_tokens_cumulative': 100})
        watch = _make_watch(llm_intent='flag price drops')
        watch['llm_tokens_used_cumulative'] = 500  # already far over

        with patch('changedetectionio.llm.client.completion') as mock_llm:
            result = evaluate_change(watch, ds, diff='- $500\n+ $400')
            mock_llm.assert_not_called()

        # Fail open: important=True so the notification is NOT suppressed
        assert result == {'important': True, 'summary': ''}

    def test_evaluate_change_per_check_limit_fails_open(self):
        """Per-check token exceeded after call → result still returned (fail open)."""
        from changedetectionio.llm.evaluator import evaluate_change

        # max_tokens_per_check is 50, but the call returns 150 tokens
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini', 'max_tokens_per_check': 50})
        watch = _make_watch(llm_intent='flag price drops')

        llm_response = '{"important": false, "summary": "Only minor change"}'
        with patch('changedetectionio.llm.client.completion', return_value=(llm_response, 150)):
            result = evaluate_change(watch, ds, diff='- $500\n+ $499')

        # LLM said not important, but even with per-check warning the result is returned
        # (budget warning is logged but evaluation result is still used)
        assert result is not None
        assert 'important' in result


# ---------------------------------------------------------------------------
# resolve_llm_field (generic cascade)
# ---------------------------------------------------------------------------

class TestResolveLlmField:
    def test_watch_value_takes_priority(self):
        from changedetectionio.llm.evaluator import resolve_llm_field
        tag = {'title': 'mygroup', 'llm_change_summary': 'tag summary prompt'}
        ds = _make_datastore(tags={'tag-1': tag})
        watch = _make_watch(llm_change_summary='watch summary prompt', tags=['tag-1'])
        value, source = resolve_llm_field(watch, ds, 'llm_change_summary')
        assert value == 'watch summary prompt'
        assert source == 'watch'

    def test_tag_value_used_when_watch_empty(self):
        from changedetectionio.llm.evaluator import resolve_llm_field
        tag = {'title': 'events-group', 'llm_change_summary': 'list new events'}
        ds = _make_datastore(tags={'tag-1': tag})
        watch = _make_watch(llm_change_summary='', tags=['tag-1'])
        value, source = resolve_llm_field(watch, ds, 'llm_change_summary')
        assert value == 'list new events'
        assert source == 'events-group'

    def test_returns_empty_when_not_set_anywhere(self):
        from changedetectionio.llm.evaluator import resolve_llm_field
        ds = _make_datastore()
        watch = _make_watch()
        value, source = resolve_llm_field(watch, ds, 'llm_change_summary')
        assert value == ''
        assert source == ''

    def test_works_for_llm_intent_field_too(self):
        """resolve_llm_field is generic — works for llm_intent same as llm_change_summary."""
        from changedetectionio.llm.evaluator import resolve_llm_field
        tag = {'title': 'grp', 'llm_intent': 'flag price drops'}
        ds = _make_datastore(tags={'t1': tag})
        watch = _make_watch(llm_intent='', tags=['t1'])
        value, source = resolve_llm_field(watch, ds, 'llm_intent')
        assert value == 'flag price drops'


# ---------------------------------------------------------------------------
# summarise_change
# ---------------------------------------------------------------------------

class TestSummariseChange:
    def test_returns_empty_when_llm_not_configured(self):
        from changedetectionio.llm.evaluator import summarise_change
        ds = _make_datastore(llm_cfg={})
        watch = _make_watch(llm_change_summary='List what changed')
        result = summarise_change(watch, ds, diff='- old\n+ new')
        assert result == ''

    def test_uses_default_prompt_when_no_summary_prompt(self):
        """When llm_change_summary is empty, falls back to DEFAULT_CHANGE_SUMMARY_PROMPT."""
        from changedetectionio.llm.evaluator import summarise_change, DEFAULT_CHANGE_SUMMARY_PROMPT
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini', 'api_key': 'sk-test'})
        watch = _make_watch(llm_change_summary='')
        with patch('changedetectionio.llm.client.completion',
                   return_value=('A new item was added.', 40)) as mock_llm:
            result = summarise_change(watch, ds, diff='- old\n+ new')
        mock_llm.assert_called_once()
        # Default prompt must appear in the user message
        call_messages = mock_llm.call_args.kwargs['messages']
        user_msg = next(m['content'] for m in call_messages if m['role'] == 'user')
        assert DEFAULT_CHANGE_SUMMARY_PROMPT in user_msg
        assert result == 'A new item was added.'

    def test_returns_empty_when_diff_empty(self):
        from changedetectionio.llm.evaluator import summarise_change
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_change_summary='List what changed')
        with patch('changedetectionio.llm.client.completion') as mock_llm:
            result = summarise_change(watch, ds, diff='')
            mock_llm.assert_not_called()
        assert result == ''

    def test_calls_llm_and_returns_plain_text(self):
        from changedetectionio.llm.evaluator import summarise_change
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini', 'api_key': 'sk-test'})
        watch = _make_watch(llm_change_summary='List new events in English')
        with patch('changedetectionio.llm.client.completion',
                   return_value=('3 new events added: Jazz Night, Art Show, Comedy Gig', 80)):
            result = summarise_change(watch, ds, diff='+ Jazz Night\n+ Art Show\n+ Comedy Gig')
        assert 'Jazz Night' in result
        assert 'Art Show' in result

    def test_cascades_from_tag(self):
        """llm_change_summary on a tag propagates to watches in that tag."""
        from changedetectionio.llm.evaluator import summarise_change
        tag = {'title': 'events', 'llm_change_summary': 'Translate events to English'}
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'}, tags={'tag-1': tag})
        watch = _make_watch(llm_change_summary='', tags=['tag-1'])
        with patch('changedetectionio.llm.client.completion',
                   return_value=('New concert added on Friday', 60)):
            result = summarise_change(watch, ds, diff='+ Konzert am Freitag')
        assert result == 'New concert added on Friday'

    def test_llm_failure_raises(self):
        """On LLM error, summarise_change re-raises so callers can surface the error."""
        from changedetectionio.llm.evaluator import summarise_change
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_change_summary='Describe the change')
        with patch('changedetectionio.llm.client.completion', side_effect=Exception('timeout')):
            with pytest.raises(Exception, match='timeout'):
                summarise_change(watch, ds, diff='- old\n+ new')

    def test_uses_higher_token_limit_than_eval(self):
        """summarise_change passes a dynamic max_tokens larger than the eval default (200)."""
        from changedetectionio.llm.evaluator import summarise_change, _summary_max_tokens
        ds = _make_datastore(llm_cfg={'model': 'gpt-4o-mini'})
        watch = _make_watch(llm_change_summary='Describe changes')
        diff = '- old\n+ new'
        with patch('changedetectionio.llm.client.completion',
                   return_value=('Some summary', 100)) as mock_llm:
            summarise_change(watch, ds, diff=diff)
        call_kwargs = mock_llm.call_args
        passed_max_tokens = call_kwargs.kwargs.get('max_tokens')
        assert passed_max_tokens == _summary_max_tokens(diff)
        assert passed_max_tokens >= 400  # always more generous than eval cap of 200

    def test_dynamic_token_cap_scales_with_diff_size(self):
        """Larger diffs produce a higher max_tokens cap, bounded at 3000."""
        from changedetectionio.llm.evaluator import _summary_max_tokens
        assert _summary_max_tokens('x' * 100)   == 400   # floor
        assert _summary_max_tokens('x' * 4000)  == 1000
        assert _summary_max_tokens('x' * 12000) == 3000  # ceiling
        assert _summary_max_tokens('x' * 99999) == 3000  # never exceeds ceiling


# ---------------------------------------------------------------------------
# compute_summary_cache_key / get_effective_summary_prompt
# ---------------------------------------------------------------------------

class TestSummaryCacheKey:
    def test_same_inputs_produce_same_key(self):
        from changedetectionio.llm.evaluator import compute_summary_cache_key
        key1 = compute_summary_cache_key('+ new line', 'describe changes')
        key2 = compute_summary_cache_key('+ new line', 'describe changes')
        assert key1 == key2

    def test_different_diff_produces_different_key(self):
        from changedetectionio.llm.evaluator import compute_summary_cache_key
        key1 = compute_summary_cache_key('+ line A', 'prompt')
        key2 = compute_summary_cache_key('+ line B', 'prompt')
        assert key1 != key2

    def test_different_prompt_produces_different_key(self):
        from changedetectionio.llm.evaluator import compute_summary_cache_key
        key1 = compute_summary_cache_key('diff', 'list changes')
        key2 = compute_summary_cache_key('diff', 'translate to English')
        assert key1 != key2

    def test_key_is_16_hex_chars(self):
        from changedetectionio.llm.evaluator import compute_summary_cache_key
        key = compute_summary_cache_key('diff', 'prompt')
        assert len(key) == 16
        assert all(c in '0123456789abcdef' for c in key)

    def test_get_effective_prompt_returns_custom_when_set(self):
        from changedetectionio.llm.evaluator import get_effective_summary_prompt
        ds = _make_datastore()
        watch = _make_watch(llm_change_summary='My custom prompt')
        assert get_effective_summary_prompt(watch, ds) == 'My custom prompt'

    def test_get_effective_prompt_returns_default_when_empty(self):
        from changedetectionio.llm.evaluator import get_effective_summary_prompt, DEFAULT_CHANGE_SUMMARY_PROMPT
        ds = _make_datastore()
        watch = _make_watch(llm_change_summary='')
        assert get_effective_summary_prompt(watch, ds) == DEFAULT_CHANGE_SUMMARY_PROMPT

    def test_get_effective_prompt_cascades_from_tag(self):
        from changedetectionio.llm.evaluator import get_effective_summary_prompt
        tag = {'title': 'grp', 'llm_change_summary': 'tag-level prompt'}
        ds = _make_datastore(tags={'t1': tag})
        watch = _make_watch(llm_change_summary='', tags=['t1'])
        assert get_effective_summary_prompt(watch, ds) == 'tag-level prompt'