Bump actions/checkout from 6 to 7 in the all group

Bumps the all group with 1 update: [actions/checkout](https://github.com/actions/checkout). Updates `actions/checkout` from 6 to 7 - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v6...v7) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '7' dependency-type: direct:production update-type: version-update:semver-major dependency-group: all ... Signed-off-by: dependabot[bot] <support@github.com>
Restock - Threshold change since "first check" was working really as "since last check", update UI, tests, field name.
2026-06-21 16:18:21 +00:00 · 2026-06-19 00:23:12 +00:00 · 2026-06-18 12:01:41 +02:00 · 2026-06-18 10:49:24 +02:00 · 2026-06-15 13:52:28 +02:00 · 2026-06-03 12:07:40 +02:00
258 changed files with 47641 additions and 8131 deletions
@@ -30,7 +30,7 @@ jobs:

    steps:
    - name: Checkout repository
-      uses: actions/checkout@v6
+      uses: actions/checkout@v7

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
@@ -39,7 +39,7 @@ jobs:
    # Or if we are in a tagged release scenario.
    if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - name: Set up Python 3.11
        uses: actions/setup-python@v6
        with:
@@ -7,7 +7,7 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v7
    - name: Set up Python
      uses: actions/setup-python@v6
      with:
@@ -44,7 +44,7 @@ jobs:
          - platform: linux/arm64
            dockerfile: ./.github/test/Dockerfile-alpine
    steps:
-        - uses: actions/checkout@v6
+        - uses: actions/checkout@v7
        - name: Set up Python 3.11
          uses: actions/setup-python@v6
          with:
@@ -7,12 +7,12 @@ jobs:
  lint-code:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - name: Lint with Ruff
        run: |
          pip install ruff
-          # Check for syntax errors and undefined names
-          ruff check . --select E9,F63,F7,F82
+          # Check for syntax errors and undefined names, and gettext misuse
+          ruff check . --select E9,F63,F7,F82,INT
          # Complete check with errors treated as warnings
          ruff check . --exit-zero
      - name: Validate OpenAPI spec
@@ -20,10 +20,93 @@ jobs:
          pip install openapi-spec-validator
          python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"

+  lint-translations:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v7
+      - name: Check .po files with msgfmt
+        run: |
+          sudo apt-get install -y gettext
+          find changedetectionio/translations -name "*.po" | while read f; do
+            echo "Checking $f"
+            msgfmt --check-format -o /dev/null "$f"
+          done
+      - name: Lint .pot template with dennis
+        run: |
+          pip install "$(grep -E '^dennis ?>=' requirements.txt)"
+          dennis-cmd lint --strict changedetectionio/translations/messages.pot
+      - name: Lint .po files with dennis
+        run: |
+          dennis-cmd lint --strict --excluderules=W302 changedetectionio/translations/*/LC_MESSAGES/messages.po
+        # W302 (unchanged) is excluded due to high false-positive rate in this codebase:
+        # many msgstrs intentionally match msgid (units like "AI", "LLM", and proper nouns).
+      - name: Check translation catalog is up-to-date
+        run: |
+          pip install "$(grep -E '^babel==' requirements.txt)"
+          python setup.py extract_messages
+          python setup.py update_catalog
+          python setup.py compile_catalog
+          # Ignore POT-Creation-Date timestamp lines — they change on every extract_messages run
+          if git diff changedetectionio/translations | grep -v 'POT-Creation-Date' | grep -qE '^[+-][^+-]'; then
+            echo "ERROR: Translation catalog is out of sync. Run: python setup.py extract_messages && python setup.py update_catalog && python setup.py compile_catalog"
+            git diff --stat changedetectionio/translations
+            exit 1
+          fi
+
+  lint-template-i18n:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v7
+      - name: Check for fragmented gettext calls in templates
+        run: |
+          python3 << 'PYEOF'
+          import re, sys
+          from pathlib import Path
+
+          # Detects adjacent {{ _(...) }} calls on the same line separated only by HTML
+          # tags, whitespace, or non-translating Jinja2 variables — the anti-pattern of
+          # splitting a single sentence across multiple msgids.
+          # See https://github.com/dgtlmoon/changedetection.io/issues/4074 for background.
+          #
+          # The correct fix is to consolidate fragments into one entire-sentence msgid,
+          # injecting dynamic values via %(name)s kwargs — per the GNU gettext manual
+          # sections "Entire sentences" and "No string concatenation". See PR #4076 for
+          # worked examples of each consolidation pattern.
+          #
+          # BASELINE: this limit reflects pre-existing violations present when this check
+          # was introduced. It must only ever go DOWN. Each time you fix a template, lower
+          # the limit by the number of lines fixed so the improvement is locked in.
+          # When the count reaches 0, replace the baseline check with a hard sys.exit(1).
+          BASELINE_LIMIT = 44
+
+          FRAGMENT_RE = re.compile(
+              r'\{\{[^{}]*\b_\s*\([^)]*\)[^{}]*\}\}'
+              r'(?:\s*(?:<[^>]+>|\{\{(?![^}]*\b_\s*\()[^}]*\}\})\s*)+'
+              r'\{\{[^{}]*\b_\s*\([^)]*\)[^{}]*\}\}'
+          )
+
+          violations = []
+          for f in sorted(Path('changedetectionio').rglob('*.html')):
+              for lineno, line in enumerate(f.read_text().splitlines(), 1):
+                  if FRAGMENT_RE.search(line):
+                      violations.append(f"{f}:{lineno}: {line.strip()[:120]}")
+
+          count = len(violations)
+          print(f"Fragmented i18n calls found: {count} (limit: {BASELINE_LIMIT})")
+          for v in violations:
+              print(v)
+
+          if count > BASELINE_LIMIT:
+              print(f"\nERROR: {count} fragmented gettext calls exceed the baseline of {BASELINE_LIMIT}.")
+              print("Consolidate adjacent _() calls into a single entire-sentence msgid.")
+              print("See https://github.com/dgtlmoon/changedetection.io/issues/4074 for patterns.")
+              sys.exit(1)
+          PYEOF
+
  test-application-3-10:
    # Only run on push to master (including PR merges)
    if: github.event_name == 'push' && github.ref == 'refs/heads/master'
-    needs: lint-code
+    needs: [lint-code, lint-translations, lint-template-i18n]
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
    with:
      python-version: '3.10'
@@ -31,7 +114,7 @@ jobs:

  test-application-3-11:
    # Always run
-    needs: lint-code
+    needs: [lint-code, lint-translations, lint-template-i18n]
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
    with:
      python-version: '3.11'
@@ -39,7 +122,7 @@ jobs:
  test-application-3-12:
    # Only run on push to master (including PR merges)
    if: github.event_name == 'push' && github.ref == 'refs/heads/master'
-    needs: lint-code
+    needs: [lint-code, lint-translations, lint-template-i18n]
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
    with:
      python-version: '3.12'
@@ -48,7 +131,7 @@ jobs:
  test-application-3-13:
    # Only run on push to master (including PR merges)
    if: github.event_name == 'push' && github.ref == 'refs/heads/master'
-    needs: lint-code
+    needs: [lint-code, lint-translations, lint-template-i18n]
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
    with:
      python-version: '3.13'
@@ -57,7 +140,7 @@ jobs:

  test-application-3-14:
    #if: github.event_name == 'push' && github.ref == 'refs/heads/master'
-    needs: lint-code
+    needs: [lint-code, lint-translations, lint-template-i18n]
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
    with:
      python-version: '3.14'
@@ -21,7 +21,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Set up Python ${{ env.PYTHON_VERSION }}
        uses: actions/setup-python@v6
@@ -85,7 +85,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -99,11 +99,7 @@ jobs:

      - name: Run Unit Tests
        run: |
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'         
+          docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/ tests/llm/'

  # Basic pytest tests with ancillary services
  basic-tests:
@@ -113,7 +109,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -185,7 +181,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -227,7 +223,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -267,7 +263,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -292,8 +288,8 @@ jobs:

      - name: Specific tests in built container for Selenium
        run: |
-          docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest  --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
-          docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest  --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
+          docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
+

  # SMTP tests
  smtp-tests:
@@ -303,7 +299,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -331,7 +327,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -501,7 +497,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -541,7 +537,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -571,7 +567,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -587,6 +583,10 @@ jobs:
        run: |
          docker run  -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s  tests/plugins/test_processor.py::test_check_plugin_processor'

+      - name: Plugin get_html_head_extras hook injects into base.html
+        run: |
+          docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
+
  # Container startup tests
  container-tests:
    runs-on: ubuntu-latest
@@ -595,7 +595,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -640,7 +640,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7

      - name: Download Docker image artifact
        uses: actions/download-artifact@v8
@@ -694,7 +694,7 @@ jobs:
    env:
      PYTHON_VERSION: ${{ inputs.python-version }}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
        with:
          fetch-depth: 0  # Fetch all history and tags for upgrade testing

@@ -7,3 +7,19 @@ repos:
        args: [--fix]
      # Fomrat
      - id: ruff-format
+
+  - repo: local
+    hooks:
+      - id: dennis-lint-pot
+        name: dennis lint pot
+        language: system
+        entry: dennis-cmd lint --strict
+        files: ^changedetectionio/translations/messages\.pot$
+        pass_filenames: true
+
+      - id: dennis-lint-po
+        name: dennis lint po
+        language: system
+        entry: dennis-cmd lint --strict --excluderules=W302
+        files: ^changedetectionio/translations/\w+/LC_MESSAGES/messages\.po$
+        pass_filenames: true
@@ -20,10 +20,11 @@ exclude = [
 select = [
    "B", # flake8-bugbear
    "B9",
-    "C", 
+    "C",
    "E", # pycodestyle
    "F", # Pyflakes
    "I", # isort
+    "INT", # flake8-gettext
    "N", # pep8-naming
    "UP", # pyupgrade
    "W", # pycodestyle
@@ -43,6 +44,9 @@ ignore = [
 [lint.mccabe]
 max-complexity = 12

+[lint.flake8-gettext]
+extend-function-names = ["_l", "lazy_gettext", "pgettext", "npgettext"]
+
 [format]
 indent-style = "space"
 quote-style = "preserve"
@@ -22,6 +22,20 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
 - Get started watching and receiving website change notifications straight away.
 - See our [tutorials and how-to page for more inspiration](https://changedetection.io/tutorials) 

+## AI-powered website change detection — smart alerts and plain-language summaries
+
+Stop drowning in noise. Connect any LLM (OpenAI, Gemini, Anthropic, Ollama and more) and go from _"something changed"_ to _"only the thing you care about changed"_.
+
+**AI change detection rules** — write a plain-English intent once: _"notify me only when the price drops below $50"_, _"alert me when the item comes back in stock"_, _"ignore navigation and footer changes"_. The AI evaluates every detected diff against your intent and silently suppresses everything irrelevant. Fewer false positives, zero noise.
+
+**AI change summaries** — instead of staring at a raw diff, your notification reads _"Price dropped from $89.99 to $67.00"_ or _"3 new products added to the listing"_. Works globally or per-watch, with full control over the prompt.
+
+Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with **Ollama**, **vLLM**, **LM Studio**, or any **OpenAI-compatible self-hosted endpoint** — pick the *OpenAI-compatible (vLLM, LM Studio, llama.cpp)* option in the provider dropdown and point it at your server's `/v1` URL. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).
+
+[<img src="./docs/LLM-change-summary.jpeg" style="max-width:100%;" alt="AI-powered website change detection — plain language change summaries and smart alert rules"  title="AI website change detection with LLM change summaries and intelligent alert filtering" />](https://changedetection.io?src=github)
+
+_Note: Available in our subscription/hosted service from June 2026_
+
 ### Target specific parts of the webpage using the Visual Selector tool.

 Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
@@ -308,9 +322,27 @@ I offer commercial support, this software is depended on by network security, ae
 [release-link]: https://github.com/dgtlmoon/changedetection.io/releases
 [docker-link]: https://hub.docker.com/r/dgtlmoon/changedetection.io

-## Commercial Licencing

-If you are reselling this software either in part or full as part of any commercial arrangement, you must abide by our COMMERCIAL_LICENCE.md found in our code repository, please contact dgtlmoon@gmail.com and contact@changedetection.io .
+## Disclaimer
+
+**This software is provided "as-is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the software or the use or other dealings in the software.**
+
+### Website content monitoring
+
+You are solely responsible for ensuring that your use of this software complies with the terms of service, `robots.txt` directives, access policies, and all applicable laws of any website or service you choose to monitor. The authors and contributors of this software accept no liability whatsoever for how you choose to use it or for any consequences arising from that use.
+
+### AI / LLM features
+
+If you choose to enable AI / LLM features, content detected on monitored websites — including page diffs and extracted text — will be transmitted to a third-party AI provider of your choosing, outside of this installation. You are solely responsible for:
+
+- Ensuring such transmission is permitted by the terms of service of every website you monitor.
+- Compliance with all applicable data-protection and privacy laws (including but not limited to GDPR) with respect to any personal data that may appear in monitored content.
+- All API costs and charges levied by your chosen AI provider. This software has no visibility into or control over those charges.
+- Any consequences arising from acting on AI-generated output.
+
+**AI and LLM models are known to hallucinate** — producing plausible-sounding but factually incorrect, incomplete, or entirely fabricated output with apparent confidence. By design, LLMs may also omit or silently truncate relevant information during summarisation. **AI output must never be relied upon as complete or accurate.**
+
+By using this software, and in particular any AI / LLM features, you personally indemnify and hold harmless the author(s), contributor(s), and any associated parties from and against any and all claims, damages, losses, costs, and expenses (including reasonable legal fees) arising out of or in connection with your use of this software.

 ## Third-party licenses

@@ -320,4 +352,6 @@ changedetectionio.html_tools.elementpath_tostring: Copyright (c), 2018-2021, SIS

 Recognition of fantastic contributors to the project

+<sub>Developer note: see [translation guide](changedetectionio/translations/README.md) for i18n template patterns and workflow.</sub>
+
 - Constantin Hong https://github.com/Constantin1489
@@ -1,5 +1,6 @@
 [python: **.py]
-keywords = _:1,_l:1,gettext:1
+keywords = _ _l gettext pgettext:1c,2

 [jinja2: **/templates/**.html]
 encoding = utf-8
+keywords = _ _l gettext pgettext:1c,2
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki
 # Semver means never use .01, or 00. Should be .1.
-__version__ = '0.54.7'
+__version__ = '0.55.7'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
@@ -10,6 +10,7 @@ from json.decoder import JSONDecodeError
 from loguru import logger
 import getopt
 import logging
+import os
 import platform
 import signal
 import threading
@@ -399,8 +400,11 @@ def main():
        datastore.data['settings']['application']['all_paused'] = all_paused
        logger.info(f"Setting all watches paused: {all_paused}")

+    # Register built-in restock plugins (deferred here to avoid circular imports at module load time)
+    from changedetectionio.pluggy_interface import inject_datastore_into_plugins, register_builtin_restock_plugins
+    register_builtin_restock_plugins()
+
    # Inject datastore into plugins that need access to settings
-    from changedetectionio.pluggy_interface import inject_datastore_into_plugins
    inject_datastore_into_plugins(datastore)

    # Step 1: Add URLs with their options (if provided via -u flags)
@@ -623,12 +627,14 @@ def main():

    @app.context_processor
    def inject_template_globals():
+        from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config
        return dict(right_sticky="v"+__version__,
                    new_version_available=app.config['NEW_VERSION_AVAILABLE'],
                    has_password=datastore.data['settings']['application']['password'] != False,
                    socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
                    all_paused=datastore.data['settings']['application'].get('all_paused', False),
-                    all_muted=datastore.data['settings']['application'].get('all_muted', False)
+                    all_muted=datastore.data['settings']['application'].get('all_muted', False),
+                    llm_configured=bool(_get_llm_config(datastore)),
                    )

    # Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
@@ -154,10 +154,11 @@ class Import(Resource):
            if extras['processor'] not in available:
                return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400

-        # Validate fetch_backend if provided (legacy API compat — still accepted, stored as-is)
+        # Validate fetch_backend if provided
        if 'fetch_backend' in extras:
            from changedetectionio.content_fetchers import available_fetchers
            available = [f[0] for f in available_fetchers()]
+            # Also allow 'system' and extra_browser_* patterns
            is_valid = (
                extras['fetch_backend'] == 'system' or
                extras['fetch_backend'] in available or
@@ -166,14 +167,6 @@ class Import(Resource):
            if not is_valid:
                return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400

-        # Validate browser_profile if provided
-        if 'browser_profile' in extras:
-            from changedetectionio.model.browser_profile import get_builtin_profiles, RESERVED_MACHINE_NAMES
-            store_profiles = self.datastore.data['settings']['application'].get('browser_profiles', {})
-            known = set(get_builtin_profiles().keys()) | set(store_profiles.keys()) | {'system', None}
-            if extras['browser_profile'] not in known:
-                return f"Invalid browser_profile '{extras['browser_profile']}'. Available: {', '.join(str(k) for k in known)}", 400
-
        # Validate notification_urls if provided
        if 'notification_urls' in extras:
            from wtforms import ValidationError
@@ -7,7 +7,7 @@ import threading
 from flask import request
 from . import auth

-from . import validate_openapi_request
+from . import validate_openapi_request, strip_internal_api_fields


 class Tag(Resource):
@@ -85,10 +85,8 @@ class Tag(Resource):
        # Create clean tag dict without Watch-specific fields
        clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}

-        # fetch_backend is a legacy field superseded by browser_profile — omit from API response
-        clean_tag.pop('fetch_backend', None)
-
-        return clean_tag
+        # Never expose `__`-prefixed transient/internal fields
+        return strip_internal_api_fields(clean_tag)

    @auth.check_token
    @validate_openapi_request('deleteTag')
@@ -116,8 +114,9 @@ class Tag(Resource):
        if not tag:
            abort(404, message='No tag exists with the UUID of {}'.format(uuid))

-        # Make a mutable copy of request.json for modification
-        json_data = dict(request.json)
+        # Make a mutable copy of request.json for modification.
+        # Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
+        json_data = strip_internal_api_fields(dict(request.json))

        # Validate notification_urls if provided
        if 'notification_urls' in json_data:
@@ -165,7 +164,8 @@ class Tag(Resource):
    def post(self):
        """Create a single tag/group."""

-        json_data = request.get_json()
+        # Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
+        json_data = strip_internal_api_fields(request.get_json())
        title = json_data.get("title",'').strip()

        # Validate that only valid fields are provided
@@ -1,4 +1,5 @@
 import os
+import re
 import threading

 from changedetectionio.validate_url import is_safe_valid_url
@@ -12,7 +13,7 @@ from flask_restful import abort, Resource
 from loguru import logger
 import copy

-from . import validate_openapi_request, get_readonly_watch_fields
+from . import validate_openapi_request, get_readonly_watch_fields, strip_internal_api_fields
 from ..notification import valid_notification_formats
 from ..notification.handler import newline_re

@@ -103,12 +104,31 @@ class Watch(Resource):
        # attr .last_changed will check for the last written text snapshot on change
        watch['last_changed'] = watch_obj.last_changed
        watch['viewed'] = watch_obj.viewed
-        watch['link'] = watch_obj.link,
+        watch['link'] = watch_obj.link

-        # fetch_backend is a legacy field superseded by browser_profile — omit from API response
-        watch.pop('fetch_backend', None)
+        # Resolved processor config: tag override wins over watch-level config (mirrors restock processor logic)
+        import json
+        _restock_path = os.path.join(watch_obj.data_dir, 'restock_diff.json') if watch_obj.data_dir else None
+        restock_config = {}
+        if _restock_path and os.path.isfile(_restock_path):
+            try:
+                with open(_restock_path, 'r', encoding='utf-8') as _f:
+                    restock_config = json.load(_f).get('restock_diff') or {}
+            except (json.JSONDecodeError, IOError) as e:
+                logger.warning(f"Failed to read restock_diff.json for watch {uuid}: {e}")
+        restock_source = 'watch'
+        tags = self.datastore.data['settings']['application'].get('tags', {})
+        for tag_uuid in (watch_obj.get('tags') or []):
+            tag = tags.get(tag_uuid, {})
+            if tag.get('overrides_watch'):
+                restock_config = dict(tag.get('processor_config_restock_diff') or {})
+                restock_source = f'tag:{tag_uuid}'
+                break
+        watch['processor_config_restock_diff'] = restock_config
+        watch['processor_config_restock_diff_source'] = restock_source

-        return watch
+        # Never expose `__`-prefixed transient/internal fields (e.g. __check_status)
+        return strip_internal_api_fields(watch)

    @auth.check_token
    @validate_openapi_request('deleteWatch')
@@ -169,8 +189,10 @@ class Watch(Resource):
        # Handle processor-config-* fields separately (save to JSON, not datastore)
        from changedetectionio import processors

-        # Make a mutable copy of request.json for modification
-        json_data = dict(request.json)
+        # Make a mutable copy of request.json for modification.
+        # Silently discard `__`-prefixed transient/internal keys — they are not part of the
+        # public schema and must never be writable (e.g. clients that round-trip GET → PUT).
+        json_data = strip_internal_api_fields(dict(request.json))

        # Extract and remove processor config fields from json_data
        processor_config_data = processors.extract_processor_config_from_form_data(json_data)
@@ -257,8 +279,28 @@ class WatchSingleHistory(Resource):
        if request.args.get('html'):
            content = watch.get_fetched_html(timestamp)
            if content:
+                # XSS mitigation (GHSA-cgj8-g98g-4p9x): this is an API endpoint, not a
+                # browser-rendered view. The bytes ARE HTML (that's what the caller asked
+                # for) but a programmatic client doesn't need text/html — and serving
+                # text/html lets attacker-planted <script> in a monitored site execute
+                # in our origin if someone opens the URL in a browser.
+                #
+                # text/plain + explicit utf-8 + nosniff = browser shows inert text,
+                # sniffing can't re-classify it as HTML, an absent charset can't be
+                # auto-detected as UTF-7 (an alternative XSS vector). API clients
+                # still get the raw bytes — they don't care about Content-Type.
                response = make_response(content, 200)
-                response.mimetype = "text/html"
+                response.headers['Content-Type'] = 'text/plain; charset=utf-8'
+                response.headers['X-Content-Type-Options'] = 'nosniff'
+                # Include the timestamp in the download name so downloading multiple
+                # snapshots doesn't collide. No extension — the stored bytes are
+                # "whatever the fetcher captured" (HTML, JSON, XML, text…), so
+                # claiming .html on the download would be a false content-type label
+                # for non-HTML watches. The user/curl can rename if needed.
+                # Strip to safe filename chars (timestamp is already validated as a
+                # watch.history key — this is defense in depth against header injection).
+                safe_ts = re.sub(r'[^0-9A-Za-z_-]', '', str(timestamp))[:32] or 'snapshot'
+                response.headers['Content-Disposition'] = f'attachment; filename="snapshot-{safe_ts}"'
            else:
                response = make_response("No content found", 404)
                response.mimetype = "text/plain"
@@ -425,7 +467,8 @@ class CreateWatch(Resource):
    def post(self):
        """Create a single watch."""

-        json_data = request.get_json()
+        # Silently discard `__`-prefixed transient/internal keys (not part of the public schema).
+        json_data = strip_internal_api_fields(request.get_json())
        url = json_data['url'].strip()

        if not is_safe_valid_url(url):
@@ -133,6 +133,43 @@ def get_tag_schema_properties():
    """
    return _resolve_schema_properties('Tag')

+def strip_private_keys(data):
+    """
+    Remove `__`-prefixed keys from a watch/tag dict at the API boundary.
+
+    These are transient in-memory fields (e.g. `__check_status` set by the worker to
+    surface "Fetching page..." in the UI) and are not part of the public OpenAPI
+    contract. They must never appear in GET responses (otherwise a client that
+    round-trips GET → PUT trips the unknown-field validator), and must be silently
+    discarded from incoming PUT/POST payloads.
+
+    Returns a new dict; the input is not mutated.
+    """
+    if not isinstance(data, dict):
+        return data
+    return {k: v for k, v in data.items() if not (isinstance(k, str) and k.startswith('__'))}
+
+
+def strip_internal_api_fields(data):
+    """
+    Strip both `__`-prefixed keys AND system-managed fields that aren't in the public
+    OpenAPI spec (skip-cache hashes, LLM runtime state, processor-set status, etc.).
+
+    Use this at every public API boundary so GET responses and PUT/POST payloads agree
+    on what's part of the contract. The set of system-managed fields lives in
+    model/schema_utils.py:SYSTEM_MANAGED_NON_SPEC_FIELDS — extend it there, not here.
+
+    Returns a new dict; the input is not mutated.
+    """
+    if not isinstance(data, dict):
+        return data
+    from changedetectionio.model.schema_utils import SYSTEM_MANAGED_NON_SPEC_FIELDS
+    return {
+        k: v for k, v in data.items()
+        if not (isinstance(k, str) and (k.startswith('__') or k in SYSTEM_MANAGED_NON_SPEC_FIELDS))
+    }
+
+
 def validate_openapi_request(operation_id):
    """Decorator to validate incoming requests against OpenAPI spec."""
    def decorator(f):
@@ -3,6 +3,16 @@ from functools import wraps
 from flask import current_app, redirect, request
 from loguru import logger

+# Endpoints exempt from auth when `shared_diff_access` is enabled.
+# Must be exact endpoint names — substring matching (GHSA-vwgh-2hvh-4xm5)
+# let the state-changing `/diff/<uuid>/extract` endpoints slip through
+# because their names share the `diff_history_page` prefix.
+SHARED_DIFF_READ_ONLY_ENDPOINTS = frozenset({
+    'ui.ui_diff.diff_history_page',
+    'ui.ui_diff.processor_asset',
+    'ui.ui_diff.download_patch',
+})
+
 def login_optionally_required(func):
    """
    If password authentication is enabled, verify the user is logged in.
@@ -20,7 +30,7 @@ def login_optionally_required(func):
        has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)

        # Permitted
-        if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
+        if request.endpoint in SHARED_DIFF_READ_ONLY_ENDPOINTS and datastore.data['settings']['application'].get('shared_diff_access'):
            return func(*args, **kwargs)
        elif request.method in flask_login.config.EXEMPT_METHODS:
            return func(*args, **kwargs)
@@ -98,8 +98,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
    backup_threads = []

-    @login_optionally_required
    @backups_blueprint.route("/request-backup", methods=['GET'])
+    @login_optionally_required
    def request_backup():
        if any(thread.is_alive() for thread in backup_threads):
            flash(gettext("A backup is already running, check back in a few minutes"), "error")
@@ -141,8 +141,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):

        return backup_info

-    @login_optionally_required
    @backups_blueprint.route("/download/<string:filename>", methods=['GET'])
+    @login_optionally_required
    def download_backup(filename):
        import re
        filename = filename.strip()
@@ -165,9 +165,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        logger.debug(f"Backup download request for '{full_path}'")
        return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)

-    @login_optionally_required
    @backups_blueprint.route("/", methods=['GET'])
    @backups_blueprint.route("/create", methods=['GET'])
+    @login_optionally_required
    def create():
        backups = find_backups()
        output = render_template("backup_create.html",
@@ -176,8 +176,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                                 )
        return output

-    @login_optionally_required
    @backups_blueprint.route("/remove-backups", methods=['GET'])
+    @login_optionally_required
    def remove_backups():

        backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
@@ -174,8 +174,8 @@ def construct_restore_blueprint(datastore):
    restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
    restore_threads = []

-    @login_optionally_required
    @restore_blueprint.route("/restore", methods=['GET'])
+    @login_optionally_required
    def restore():
        form = RestoreForm()
        return render_template("backup_restore.html",
@@ -184,8 +184,8 @@ def construct_restore_blueprint(datastore):
                               max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
                               max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))

-    @login_optionally_required
    @restore_blueprint.route("/restore/start", methods=['POST'])
+    @login_optionally_required
    def backups_restore_start():
        if any(t.is_alive() for t in restore_threads):
            flash(gettext("A restore is already running, check back in a few minutes"), "error")
@@ -20,8 +20,7 @@
                <p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
                <p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
                <p class="pure-form-message">
-                    {{ _('Max upload size: %(upload)s MB &nbsp;·&nbsp; Max decompressed size: %(decomp)s MB',
-                         upload=max_upload_mb, decomp=max_decompressed_mb) }}
+                    {{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
                </p>

                <form class="pure-form pure-form-stacked settings"
@@ -208,23 +208,28 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        browsersteps_start_session = {'start_time': time.time()}

        # Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
-        proxy_url = datastore.get_proxy_url_for_watch(uuid=watch_uuid)
+        proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
        proxy = None
-        if proxy_url:
-            from urllib.parse import urlparse
-            parsed = urlparse(proxy_url)
-            proxy = {'server': proxy_url}
-            if parsed.username:
-                proxy['username'] = parsed.username
-            if parsed.password:
-                proxy['password'] = parsed.password
-            logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
+        if proxy_id:
+            proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
+            if proxy_url:
+                from urllib.parse import urlparse
+                parsed = urlparse(proxy_url)
+                proxy = {'server': proxy_url}
+                if parsed.username:
+                    proxy['username'] = parsed.username
+                if parsed.password:
+                    proxy['password'] = parsed.password
+                logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")

        # Resolve the fetcher class for this watch so we can ask it to launch its own browser
        # if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
        watch = datastore.data['watching'][watch_uuid]
        from changedetectionio import content_fetchers
-        fetcher_class = content_fetchers.get_fetcher(watch.effective_browser_profile.fetch_backend)
+        fetcher_name = watch.get_fetch_backend or 'system'
+        if fetcher_name == 'system':
+            fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
+        fetcher_class = getattr(content_fetchers, fetcher_name, None)

        browser = None
        playwright_context = None
@@ -236,7 +241,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
            if result is not None:
                browser, playwright_context = result
-                logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_class.__name__}'")
+                logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")

        # Default: connect to the remote Playwright/sockpuppetbrowser via CDP
        if browser is None:
@@ -263,8 +268,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        return browsersteps_start_session


-    @login_optionally_required
    @browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
+    @login_optionally_required
    def browsersteps_start_session():
        # A new session was requested, return sessionID
        import uuid
@@ -299,8 +304,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        logger.debug("Starting connection with playwright - done")
        return {'browsersteps_session_id': browsersteps_session_id}

-    @login_optionally_required
    @browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
+    @login_optionally_required
    def browser_steps_fetch_screenshot_image():
        from flask import (
            make_response,
@@ -325,8 +330,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)

    # A request for an action was received
-    @login_optionally_required
    @browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
+    @login_optionally_required
    def browsersteps_ui_update():
        import base64

@@ -46,8 +46,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                                                                 watch_uuid=uuid
                                                                 )

-            update_handler.preferred_proxy_override = preferred_proxy
-            asyncio.run(update_handler.call_browser())
+            asyncio.run(update_handler.call_browser(preferred_proxy_id=preferred_proxy))
        # title, size is len contents not len xfer
        except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
            if e.status_code == 404:
@@ -75,7 +75,7 @@ class import_url_list(Importer):
                self.remaining_data = []
            self.remaining_data.append(url)

-        flash(gettext("{} Imported from list in {:.2f}s, {} Skipped.").format(good, time.time() - now, len(self.remaining_data)))
+        flash(gettext("{count} Imported from list in {duration}s, {skipped_count} Skipped.").format(count=good, duration=f"{time.time() - now:.2f}", skipped_count=len(self.remaining_data)))


 class import_distill_io_json(Importer):
@@ -136,7 +136,7 @@ class import_distill_io_json(Importer):
                    self.new_uuids.append(new_uuid)
                    good += 1

-        flash(gettext("{} Imported from Distill.io in {:.2f}s, {} Skipped.").format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
+        flash(gettext("{count} Imported from Distill.io in {duration}s, {skipped_count} Skipped.").format(count=len(self.new_uuids), duration=f"{time.time() - now:.2f}", skipped_count=len(self.remaining_data)))


 class import_xlsx_wachete(Importer):
@@ -160,8 +160,7 @@ class import_xlsx_wachete(Importer):
            flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error')
            return

-        row_id = 2
-        for row in wb.active.iter_rows(min_row=row_id):
+        for row_id, row in enumerate(wb.active.iter_rows(min_row=2), start=2):
            try:
                extras = {}
                data = {}
@@ -175,9 +174,9 @@ class import_xlsx_wachete(Importer):
                dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower()  # Convert bool to str to cover all cases
                # libreoffice and others can have it as =FALSE() =TRUE(), or bool(true)
                if 'true' in dynamic_wachet or dynamic_wachet == '1':
-                    extras['browser_profile'] = 'browser_chromeplaywright'
+                    extras['fetch_backend'] = 'html_webdriver'
                elif 'false' in dynamic_wachet or dynamic_wachet == '0':
-                    extras['browser_profile'] = 'direct_http_requests'
+                    extras['fetch_backend'] = 'html_requests'

                if data.get('xpath'):
                    # @todo split by || ?
@@ -212,10 +211,8 @@ class import_xlsx_wachete(Importer):
            except Exception as e:
                logger.error(e)
                flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error')
-            else:
-                row_id += 1

-        flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
+        flash(gettext("{count} imported from Wachete .xlsx in {duration}s").format(count=len(self.new_uuids), duration=f"{time.time() - now:.2f}"))


 class import_xlsx_custom(Importer):
@@ -241,10 +238,10 @@ class import_xlsx_custom(Importer):

        # @todo cehck atleast 2 rows, same in other method
        from changedetectionio.forms import validate_url
-        row_i = 1
+        row_i = 0

        try:
-            for row in wb.active.iter_rows():
+            for row_i, row in enumerate(wb.active.iter_rows(), start=1):
                url = None
                tags = None
                extras = {}
@@ -295,7 +292,5 @@ class import_xlsx_custom(Importer):
        except Exception as e:
            logger.error(e)
            flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error')
-        else:
-            row_i += 1

-        flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
+        flash(gettext("{count} imported from custom .xlsx in {duration}s").format(count=len(self.new_uuids), duration=f"{time.time() - now:.2f}"))
@@ -7,8 +7,9 @@
    <div class="tabs collapsable">
        <ul>
            <li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
-            <li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
+            <li class="tab"><a href="#distill-io">Distill.io</a></li>
            <li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
+            <li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
        </ul>
    </div>

@@ -24,7 +25,7 @@
                <div class="pure-control-group">
                        {{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
                        <br>
-                        <p><strong>{{ _('Example:') }}  </strong><code>https://example.com tag1, tag2, last tag</code></p>
+                        <p><strong>{{ _('Example') }}:  </strong><code>https://example.com tag1, tag2, last tag</code></p>
                        {{ _('URLs which do not pass validation will stay in the textarea.') }}
                </div>
                {{ render_field(form.processor, class="processor") }}
@@ -44,7 +45,8 @@
            <div class="tab-pane-inner" id="distill-io">
                    <div class="pure-control-group">
                        {{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
-                        {{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
+                        {# TRANSLATORS: CJK fonts lack native italics; allow substitution with conventional local styling. dennis-ignore: W303 #}
+                        {{ _('This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.')|safe }}
                        <br>
                        <p>
                        {{ _('How to export?') }} <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
@@ -102,7 +104,7 @@
                            {% for n in range(4) %}
                                <td><select name="custom_xlsx[col_type_{{n}}]">
                                    <option value="" style="color: #aaa"> -- {{ _('none') }} --</option>
-                                    <option value="url">{{ _('URL') }}</option>
+                                    <option value="url">URL</option>
                                    <option value="title">{{ _('Title') }}</option>
                                    <option value="include_filters">{{ _('CSS/xPath filter') }}</option>
                                    <option value="tag">{{ _('Group / Tag name(s)') }}</option>
@@ -10,13 +10,15 @@ from flask_babel import gettext

 from changedetectionio.store import ChangeDetectionStore
 from changedetectionio.auth_decorator import login_optionally_required
+from changedetectionio.model.LLMSettings import LLMSettings


 def construct_blueprint(datastore: ChangeDetectionStore):
+    from changedetectionio.llm.evaluator import is_llm_features_disabled
    settings_blueprint = Blueprint('settings', __name__, template_folder="templates")
-
-    from changedetectionio.blueprint.settings.browser_profile import construct_blueprint as construct_browser_profile_blueprint
-    settings_blueprint.register_blueprint(construct_browser_profile_blueprint(datastore), url_prefix='/browsers')
+    if not is_llm_features_disabled():
+        from changedetectionio.blueprint.settings.llm import construct_llm_blueprint
+        settings_blueprint.register_blueprint(construct_llm_blueprint(datastore), url_prefix='/llm')

    @settings_blueprint.route("", methods=['GET', "POST"])
    @login_optionally_required
@@ -30,6 +32,14 @@ def construct_blueprint(datastore: ChangeDetectionStore):


        default = deepcopy(datastore.data['settings'])
+
+        # api_key is intentionally blanked on GET — PasswordField never re-renders
+        # its value, and a blank submission preserves the stored key.
+        default['llm'] = LLMSettings.model_validate(
+            datastore.data['settings']['application'].get('llm') or {}
+        ).model_dump()
+        default['llm']['api_key'] = ''
+
        if datastore.proxy_list is not None:
            available_proxies = list(datastore.proxy_list.keys())
            # When enabled
@@ -79,6 +89,44 @@ def construct_blueprint(datastore: ChangeDetectionStore):

                datastore.data['settings']['application'].update(app_update)

+                # LLM config lives under settings.application.llm.* (post update_31).
+                # Hydrate the stored dict into LLMSettings, then merge form input over it.
+                # WTForms field names match LLMSettings field names exactly, so both sides
+                # of the merge use the same key shape.
+                existing_llm = LLMSettings.model_validate(
+                    datastore.data['settings']['application'].get('llm') or {}
+                )
+
+                llm_form_input = dict(form.data.get('llm') or {})
+
+                # Empty IntegerField submissions come back as None from WTForms;
+                # the schema declares those fields as strict `int`, so passing
+                # them through would fail validation. Treat None like the
+                # absent-key case: keep the stored value, don't merge.
+                llm_form_input = {k: v for k, v in llm_form_input.items() if v is not None}
+
+                # PasswordField never re-renders, so a blank submitted value means
+                # "keep stored key" — drop it from the merge.
+                if not (llm_form_input.get('api_key') or '').strip():
+                    llm_form_input.pop('api_key', None)
+
+                # Env-var overrides make these fields read-only in the UI — ignore form input.
+                if os.getenv('LLM_TOKEN_BUDGET_MONTH', '').strip():
+                    llm_form_input.pop('token_budget_month', None)
+                if os.getenv('LLM_MAX_INPUT_CHARS', '').strip():
+                    llm_form_input.pop('max_input_chars', None)
+
+                # System-managed counters must never come from the form.
+                for protected in LLMSettings.PROTECTED_FIELDS:
+                    llm_form_input.pop(protected, None)
+
+                merged = LLMSettings.model_validate({**existing_llm.model_dump(), **llm_form_input})
+
+                # Clearing the model field strips only the provider-connection fields.
+                # User toggles, budgets, prompts and system counters survive (matches /llm/clear).
+                exclude = set(LLMSettings.CONNECTION_FIELDS) if not merged.model.strip() else None
+                datastore.data['settings']['application']['llm'] = merged.model_dump(exclude=exclude)
+
                # Handle dynamic worker count adjustment
                old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
                new_worker_count = form.data['requests'].get('workers', 1)
@@ -98,8 +146,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                    # Check CPU core availability and warn if worker count is high
                    cpu_count = os.cpu_count()
                    if cpu_count and new_worker_count >= (cpu_count * 0.9):
-                        flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format(
-                            new_worker_count, cpu_count), 'warning')
+                        flash(gettext("Warning: Worker count ({worker_count}) is close to or exceeds available CPU cores ({cpu_count})").format(
+                            worker_count=new_worker_count, cpu_count=cpu_count), 'warning')

                    result = worker_pool.adjust_async_worker_count(
                        new_count=new_worker_count,
@@ -167,9 +215,34 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            # Instantiate the form with existing settings
            plugin_forms[plugin_id] = form_class(data=settings)

+        from changedetectionio.llm.evaluator import (
+            get_llm_config as _get_llm_cfg,
+            llm_configured_via_env,
+            get_global_token_budget_month,
+        )
+        llm_config = _get_llm_cfg(datastore) or {}
+        llm_env_configured = llm_configured_via_env()
+        llm_stored = datastore.data['settings']['application'].get('llm') or {}
+        llm_token_budget_month = get_global_token_budget_month(datastore)
+        llm_token_budget_month_env = get_global_token_budget_month()  # env var only, for readonly logic
+        _max_input_chars_env_str = os.getenv('LLM_MAX_INPUT_CHARS', '').strip()
+        llm_max_input_chars_env = int(_max_input_chars_env_str) if _max_input_chars_env_str.isdigit() else 0
+        from changedetectionio.llm.evaluator import _get_max_input_chars, _DEFAULT_MAX_INPUT_CHARS
+        llm_effective_max_input_chars = _get_max_input_chars(datastore)
+        # Cost display: only when user configured their own key (not hosted/operator-managed)
+        llm_show_costs = not llm_env_configured
+
        output = render_template("settings.html",
                                active_plugins=active_plugins,
                                api_key=datastore.data['settings']['application'].get('api_access_token'),
+                                llm_config=llm_config,
+                                llm_env_configured=llm_env_configured,
+                                llm_stored=llm_stored,
+                                llm_token_budget_month=llm_token_budget_month,
+                                llm_token_budget_month_env=llm_token_budget_month_env,
+                                llm_max_input_chars_env=llm_max_input_chars_env,
+                                llm_effective_max_input_chars=llm_effective_max_input_chars,
+                                llm_show_costs=llm_show_costs,
                                python_version=python_version,
                                uptime_seconds=uptime_seconds,
                                available_timezones=sorted(available_timezones()),
@@ -1,200 +0,0 @@
-import flask_login
-from flask import Blueprint, render_template, request, redirect, url_for, flash
-from flask_babel import gettext
-
-from changedetectionio.store import ChangeDetectionStore
-from changedetectionio.auth_decorator import login_optionally_required
-
-
-def construct_blueprint(datastore: ChangeDetectionStore):
-    settings_browser_profile_blueprint = Blueprint(
-        'settings_browsers',
-        __name__,
-        template_folder="templates"
-    )
-
-    def _render_index(browser_profile_form=None, editing_machine_name=None):
-        from changedetectionio import forms
-        from changedetectionio import content_fetchers as cf
-        from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
-
-        # Only browser-capable fetchers are valid profile types
-        fetcher_choices = cf.available_browser_fetchers()
-        if browser_profile_form is None:
-            browser_profile_form = forms.BrowserProfileForm()
-        browser_profile_form.fetch_backend.choices = fetcher_choices
-
-        fetcher_supports_screenshots = {name: True for name, _ in fetcher_choices}
-        fetcher_requires_connection_url = {name: True for name, cls in cf.FETCHERS.items()
-                                           if getattr(cls, 'requires_connection_url', False)}
-
-        # Table shows default built-in profiles first, then user-created profiles
-        store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
-        user_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
-        for machine_name, raw in store_profiles.items():
-            try:
-                user_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
-            except Exception:
-                pass
-
-        current_default = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
-
-        return render_template(
-            "browser_profiles.html",
-            browser_profiles=user_profiles,
-            browser_profile_form=browser_profile_form,
-            reserved_browser_profile_names=RESERVED_MACHINE_NAMES,
-            fetcher_choices=fetcher_choices,
-            fetcher_supports_screenshots=fetcher_supports_screenshots,
-            fetcher_requires_connection_url=fetcher_requires_connection_url,
-            current_default_profile=current_default,
-            editing_machine_name=editing_machine_name,
-        )
-
-    @settings_browser_profile_blueprint.route("", methods=['GET'])
-    @login_optionally_required
-    def index():
-        return _render_index()
-
-    @settings_browser_profile_blueprint.route("/<string:machine_name>/edit", methods=['GET'])
-    @login_optionally_required
-    def edit(machine_name):
-        from changedetectionio import forms
-        from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
-
-        if machine_name in RESERVED_MACHINE_NAMES:
-            flash(gettext("Built-in browser profiles cannot be edited."), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
-        raw = store_profiles.get(machine_name)
-        if raw is None:
-            flash(gettext("Browser profile not found."), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        profile = BrowserProfile(**raw) if isinstance(raw, dict) else raw
-        form = forms.BrowserProfileForm(data=profile.model_dump())
-        return _render_index(browser_profile_form=form, editing_machine_name=machine_name)
-
-    @settings_browser_profile_blueprint.route("/save", methods=['POST'])
-    @login_optionally_required
-    def save():
-        from changedetectionio import forms
-        from changedetectionio import content_fetchers as cf
-        from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
-
-        fetcher_choices = [(name, desc) for name, desc in cf.available_fetchers()]
-        browser_profile_form = forms.BrowserProfileForm(formdata=request.form)
-        browser_profile_form.fetch_backend.choices = fetcher_choices
-
-        if not browser_profile_form.validate():
-            flash(gettext("Browser profile error: {}").format(
-                '; '.join(str(e) for errs in browser_profile_form.errors.values() for e in errs)
-            ), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        name = browser_profile_form.name.data.strip()
-        machine_name = BrowserProfile.machine_name_from_str(name)
-
-        if machine_name in RESERVED_MACHINE_NAMES:
-            flash(gettext("Cannot use reserved profile name '{}'. Please choose a different name.").format(name), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        original_machine_name = request.form.get('original_machine_name', '').strip()
-        store_profiles = datastore.data['settings']['application'].setdefault('browser_profiles', {})
-
-        if machine_name != original_machine_name and machine_name in store_profiles:
-            flash(gettext("A browser profile named '{}' already exists.").format(name), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        profile_data = {
-            'name': name,
-            'fetch_backend': browser_profile_form.fetch_backend.data,
-            'browser_connection_url': browser_profile_form.browser_connection_url.data or None,
-            'viewport_width': browser_profile_form.viewport_width.data or 1280,
-            'viewport_height': browser_profile_form.viewport_height.data or 1000,
-            'block_images': bool(browser_profile_form.block_images.data),
-            'block_fonts': bool(browser_profile_form.block_fonts.data),
-            'ignore_https_errors': bool(browser_profile_form.ignore_https_errors.data),
-            'user_agent': browser_profile_form.user_agent.data or None,
-            'locale': browser_profile_form.locale.data or None,
-            'custom_headers': browser_profile_form.custom_headers.data or '',
-            'is_builtin': False,
-        }
-
-        try:
-            BrowserProfile(**profile_data)
-        except Exception as e:
-            flash(gettext("Browser profile validation error: {}").format(str(e)), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        # Handle rename: remove old key, cascade-update watches and tags
-        if original_machine_name and original_machine_name != machine_name and original_machine_name in store_profiles:
-            del store_profiles[original_machine_name]
-            for watch in datastore.data['watching'].values():
-                if watch.get('browser_profile') == original_machine_name:
-                    watch['browser_profile'] = machine_name
-            for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
-                if tag.get('browser_profile') == original_machine_name:
-                    tag['browser_profile'] = machine_name
-
-        store_profiles[machine_name] = profile_data
-        datastore.commit()
-        flash(gettext("Browser profile '{}' saved.").format(name), 'notice')
-        return redirect(url_for('settings.settings_browsers.index'))
-
-    @settings_browser_profile_blueprint.route("/<string:machine_name>/delete", methods=['GET'])
-    @login_optionally_required
-    def delete(machine_name):
-        from changedetectionio.model.browser_profile import RESERVED_MACHINE_NAMES
-
-        if machine_name in RESERVED_MACHINE_NAMES:
-            flash(gettext("Built-in browser profiles cannot be deleted."), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
-        if machine_name not in store_profiles:
-            flash(gettext("Browser profile not found."), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        raw = store_profiles[machine_name]
-        profile_name = raw.get('name', machine_name) if isinstance(raw, dict) else machine_name
-
-        for watch in datastore.data['watching'].values():
-            if watch.get('browser_profile') == machine_name:
-                watch['browser_profile'] = None
-
-        for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
-            if tag.get('browser_profile') == machine_name:
-                tag['browser_profile'] = None
-
-        if datastore.data['settings']['application'].get('browser_profile') == machine_name:
-            datastore.data['settings']['application']['browser_profile'] = None
-
-        del store_profiles[machine_name]
-        datastore.commit()
-        flash(gettext("Browser profile '{}' deleted.").format(profile_name), 'notice')
-        return redirect(url_for('settings.settings_browsers.index'))
-
-    @settings_browser_profile_blueprint.route("/set-default", methods=['POST'])
-    @login_optionally_required
-    def set_default():
-        from changedetectionio import content_fetchers as cf
-
-        machine_name = request.form.get('machine_name', '').strip()
-        if not machine_name:
-            flash(gettext("No profile specified."), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        from changedetectionio.model.browser_profile import get_profile
-        store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
-        if get_profile(machine_name, store_profiles) is None:
-            flash(gettext("Unknown browser profile '{}'.").format(machine_name), 'error')
-            return redirect(url_for('settings.settings_browsers.index'))
-
-        datastore.data['settings']['application']['browser_profile'] = machine_name
-        datastore.commit()
-        flash(gettext("Default browser profile set to '{}'.").format(machine_name), 'notice')
-        return redirect(url_for('settings.settings_browsers.index'))
-
-    return settings_browser_profile_blueprint
@@ -1,163 +0,0 @@
-{% extends 'base.html' %}
-{% block content %}
-{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
-
-<div class="edit-form">
-    <div class="box-wrap inner">
-        <h2>{{ _('Browser Profiles') }}</h2>
-        <p>{{ _('Create named profiles to configure browser settings — viewport size, connection URL, image/font blocking, and more. Each profile is based on an available browser type.') }}</p>
-
-        <form id="set-default-form" action="{{ url_for('settings.settings_browsers.set_default') }}" method="POST">
-            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
-            <input type="hidden" name="machine_name" id="default-machine-name" value="">
-        </form>
-        {% if browser_profiles %}
-        <table class="pure-table pure-table-striped" style="width:100%; margin-bottom:1.5em;">
-            <thead>
-                <tr>
-                    <th style="width:2.5em; text-align:center;" title="{{ _('System default') }}">{{ _('Default') }}</th>
-                    <th>{{ _('Name') }}</th>
-                    <th>{{ _('Type') }}</th>
-                    <th style="width:3em; text-align:center;"></th>
-                    <th>{{ _('Viewport') }}</th>
-                    <th>{{ _('Options') }}</th>
-                    <th></th>
-                </tr>
-            </thead>
-            <tbody>
-            {% for machine_name, profile in browser_profiles.items() %}
-                <tr>
-                    <td style="text-align:center;">
-                        <input type="radio"
-                               name="default_profile"
-                               value="{{ machine_name }}"
-                               title="{{ _('Set as system default') }}"
-                               {% if machine_name == current_default_profile %}checked{% endif %}
-                               onchange="setDefaultProfile('{{ machine_name }}')">
-                    </td>
-                    <td>{{ profile.name }}</td>
-                    <td><code>{{ profile.fetch_backend }}</code></td>
-                    <td style="text-align:center;">{{ profile.get_fetcher_class_name()|fetcher_status_icons }}</td>
-                    <td>{{ profile.viewport_width }}×{{ profile.viewport_height }}</td>
-                    <td style="font-size:0.8em; line-height:1.6;">
-                        {% if profile.block_images %}{{ _('No images') }}<br>{% endif %}
-                        {% if profile.block_fonts %}{{ _('No fonts') }}<br>{% endif %}
-                        {% if profile.ignore_https_errors %}{{ _('Ignore TLS') }}<br>{% endif %}
-                        {% if profile.browser_connection_url %}<span title="{{ profile.browser_connection_url }}">{{ _('Custom URL') }}</span>{% endif %}
-                    </td>
-                    <td style="white-space:nowrap;">
-                        {% if not profile.is_builtin %}
-                        <a href="{{ url_for('settings.settings_browsers.edit', machine_name=machine_name) }}"
-                           class="pure-button button-small">{{ _('Edit') }}</a>
-                        <a href="{{ url_for('settings.settings_browsers.delete', machine_name=machine_name) }}"
-                           class="pure-button button-small button-error"
-                           onclick="return confirm('{{ _('Delete this browser profile?') }}')">{{ _('Delete') }}</a>
-                        {% endif %}
-                    </td>
-                </tr>
-            {% endfor %}
-            </tbody>
-        </table>
-        {% else %}
-        <p style="color:#888; font-style:italic;">{{ _('No browser profiles configured yet. Add one below.') }}</p>
-        {% endif %}
-
-        <div class="border-fieldset">
-            <h3 id="profile-form-heading">{{ _('Edit browser profile') if editing_machine_name else _('Add new browser profile') }}</h3>
-            {% if not editing_machine_name %}
-            <p style="font-size:0.9em; color:#666;">{{ _('Choose a browser type, give it a name, and configure its settings. You can create multiple profiles from the same type with different connection URLs or options.') }}</p>
-            {% endif %}
-            <form class="pure-form pure-form-stacked"
-                  id="browser-profile-form"
-                  action="{{ url_for('settings.settings_browsers.save') }}"
-                  method="POST">
-                <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
-                <input type="hidden" name="original_machine_name" id="original_machine_name" value="{{ editing_machine_name or '' }}">
-                <fieldset>
-                    <div class="pure-control-group">
-                        {{ render_field(browser_profile_form.name) }}
-                    </div>
-                    <div class="pure-control-group inline-radio">
-                        {{ render_field(browser_profile_form.fetch_backend, id="profile-fetch-backend") }}
-                    </div>
-                    <div class="pure-control-group browser-only-field cdp-only-field">
-                        {{ render_field(browser_profile_form.browser_connection_url) }}
-                        <span class="pure-form-message-inline">{{ _('Optional — override the system CDP/WebSocket URL for this profile only (e.g.') }} <code>ws://my-chrome:3000</code>).</span>
-                    </div>
-                    <div class="pure-control-group browser-only-field" style="display:flex; gap:1em; flex-wrap:wrap;">
-                        <div>{{ render_field(browser_profile_form.viewport_width) }}</div>
-                        <div>{{ render_field(browser_profile_form.viewport_height) }}</div>
-                    </div>
-                    <div class="pure-control-group browser-only-field">
-                        {{ render_checkbox_field(browser_profile_form.block_images) }}
-                        <span class="pure-form-message-inline">{{ _('Block image downloads — speeds up loads on image-heavy pages.') }}</span>
-                    </div>
-                    <div class="pure-control-group browser-only-field">
-                        {{ render_checkbox_field(browser_profile_form.block_fonts) }}
-                        <span class="pure-form-message-inline">{{ _('Block web font downloads.') }}</span>
-                    </div>
-                    <div class="pure-control-group browser-only-field">
-                        {{ render_checkbox_field(browser_profile_form.ignore_https_errors) }}
-                        <span class="pure-form-message-inline">{{ _('Ignore TLS/HTTPS certificate errors (useful for self-signed certs on staging sites).') }}</span>
-                    </div>
-                    <div class="pure-control-group browser-only-field">
-                        {{ render_field(browser_profile_form.user_agent) }}
-                        <span class="pure-form-message-inline">{{ _("Leave blank to use the fetcher's default User-Agent.") }}</span>
-                    </div>
-                    <div class="pure-control-group browser-only-field">
-                        {{ render_field(browser_profile_form.locale) }}
-                        <span class="pure-form-message-inline">{{ _('Sets Accept-Language and navigator.language (e.g. en-US, de-DE).') }}</span>
-                    </div>
-                    <div class="pure-control-group">
-                        {{ render_field(browser_profile_form.custom_headers) }}
-                        <span class="pure-form-message-inline">{{ _('Extra HTTP headers for all requests using this profile (one per line, Key: Value). Applied before per-watch headers.') }}</span>
-                    </div>
-                    <div class="pure-control-group">
-                        <button type="submit" class="pure-button pure-button-primary" id="profile-submit-btn">{{ _('Save profile') }}</button>
-                        {% if editing_machine_name %}
-                        <a href="{{ url_for('settings.settings_browsers.index') }}" class="pure-button button-cancel">{{ _('Cancel') }}</a>
-                        {% endif %}
-                        <a href="{{ url_for('settings.settings_page') }}" class="pure-button button-cancel">{{ _('Back to Settings') }}</a>
-                    </div>
-                </fieldset>
-            </form>
-        </div>
-    </div>
-</div>
-
-<script>
-function setDefaultProfile(machineName) {
-    document.getElementById('default-machine-name').value = machineName;
-    document.getElementById('set-default-form').submit();
-}
-
-const fetcherSupportsBrowser = {{ fetcher_supports_screenshots | tojson }};
-const fetcherRequiresConnectionUrl = {{ fetcher_requires_connection_url | tojson }};
-
-function updateBrowserFieldVisibility() {
-    const fetchBackend = document.getElementById('profile-fetch-backend').value;
-    const isBrowser = !!fetcherSupportsBrowser[fetchBackend];
-    const isCdp = !!fetcherRequiresConnectionUrl[fetchBackend];
-    document.querySelectorAll('.browser-only-field').forEach(function(el) {
-        el.style.display = isBrowser ? '' : 'none';
-    });
-    document.querySelectorAll('.cdp-only-field').forEach(function(el) {
-        el.style.display = isCdp ? '' : 'none';
-    });
-}
-
-document.addEventListener('DOMContentLoaded', function() {
-    const sel = document.getElementById('profile-fetch-backend');
-    if (sel) {
-        sel.addEventListener('change', updateBrowserFieldVisibility);
-        updateBrowserFieldVisibility();
-    }
-});
-
-{% if editing_machine_name %}
-document.addEventListener('DOMContentLoaded', function() {
-    document.getElementById('browser-profile-form').scrollIntoView({behavior: 'smooth'});
-});
-{% endif %}
-</script>
-{% endblock %}
@@ -0,0 +1,269 @@
+import json
+import logging
+import os
+import re
+
+from flask import Blueprint, jsonify, redirect, url_for, flash
+from flask_babel import gettext
+from loguru import logger
+
+from changedetectionio.store import ChangeDetectionStore
+from changedetectionio.auth_decorator import login_optionally_required
+
+
+class _LiteLLMWarningCapture(logging.Handler):
+    """Capture warnings emitted on the 'LiteLLM' stdlib logger during a single call.
+
+    litellm.get_valid_models() catches HTTP/auth errors internally, logs a warning,
+    and returns []. Without capturing that warning we can't tell the user *why*
+    no models came back (bad key vs. offline vs. genuinely empty model list).
+    """
+    def __init__(self):
+        super().__init__(level=logging.WARNING)
+        self.messages = []
+
+    def emit(self, record):
+        try:
+            self.messages.append(record.getMessage())
+        except Exception:
+            pass
+
+
+def _humanize_litellm_error(raw: str) -> str:
+    # litellm warnings typically look like:
+    #   "Error getting valid models: Failed to get models: { 'error': { 'message': '...' } }"
+    # Pull the inner provider message when present; otherwise trim the boilerplate.
+    if not raw:
+        return raw
+    m = re.search(r'\{.*\}', raw, re.DOTALL)
+    if m:
+        try:
+            body = json.loads(m.group(0))
+            inner = (body.get('error') or {}).get('message') or body.get('message')
+            if inner:
+                return inner
+        except Exception:
+            pass
+    cleaned = re.sub(r'^Error getting valid models:\s*', '', raw)
+    cleaned = re.sub(r'^Failed to get models:\s*', '', cleaned).strip()
+    return cleaned[:500]
+
+
+def construct_llm_blueprint(datastore: ChangeDetectionStore):
+    llm_blueprint = Blueprint('llm', __name__)
+
+    @llm_blueprint.route("/models", methods=['GET'])
+    @login_optionally_required
+    def llm_get_models():
+        from flask import request
+        from changedetectionio.validate_url import is_llm_api_base_safe
+        provider = request.args.get('provider', '').strip()
+        api_key  = request.args.get('api_key',  '').strip()
+        api_base = request.args.get('api_base', '').strip()
+
+        logger.debug(f"LLM model list requested for provider={provider!r} api_base={api_base!r}")
+
+        if not provider:
+            logger.debug("LLM model list: no provider specified, returning 400")
+            return jsonify({'models': [], 'error': 'No provider specified'}), 400
+
+        ok, reason = is_llm_api_base_safe(api_base)
+        if not ok:
+            logger.warning(f"LLM model list refused: api_base failed SSRF check ({reason})")
+            return jsonify({'models': [], 'error': reason}), 400
+
+        # Credential-exfiltration guard (GHSA-g36r-fm2p-87xm).
+        # Only substitute the stored api_key when api_base matches the stored
+        # api_base. If the caller pointed at a different destination, refuse —
+        # otherwise a CSRF / unauthenticated request can ship the operator's
+        # long-lived provider key (sent as Authorization: Bearer …) to an
+        # attacker-controlled URL.
+        stored_llm     = datastore.data['settings']['application'].get('llm') or {}
+        stored_api_base = (stored_llm.get('api_base') or '').strip()
+        if not api_key:
+            if api_base == stored_api_base:
+                api_key = (stored_llm.get('api_key') or '')
+                logger.debug("LLM model list: no api_key in request, using stored key (api_base matches saved)")
+            elif api_base:
+                logger.warning("LLM model list refused: api_base differs from saved config but no api_key supplied")
+                return jsonify({'models': [], 'error': gettext(
+                    "api_key is required when api_base differs from the saved configuration. "
+                    "Refusing to send the stored API key to a different endpoint."
+                )}), 400
+
+        _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/',
+                     'openai_compatible': 'openai/'}
+        # vLLM / LM Studio / llama.cpp speak OpenAI's wire format — route through litellm's
+        # 'openai' provider but keep the UI-level name distinct from cloud OpenAI.
+        _LITELLM_PROVIDER = {'openai_compatible': 'openai'}
+        prefix = _PREFIXES.get(provider, '')
+        litellm_provider = _LITELLM_PROVIDER.get(provider, provider)
+
+        try:
+            import litellm
+            logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} (litellm={litellm_provider!r}) api_base={api_base!r}")
+
+            capture = _LiteLLMWarningCapture()
+            litellm_logger = logging.getLogger('LiteLLM')
+            litellm_logger.addHandler(capture)
+            try:
+                raw = litellm.get_valid_models(
+                    check_provider_endpoint=True,
+                    custom_llm_provider=litellm_provider,
+                    api_key=api_key or None,
+                    api_base=api_base or None,
+                ) or []
+            finally:
+                litellm_logger.removeHandler(capture)
+
+            models = sorted({(m if m.startswith(prefix) else prefix + m) for m in raw})
+
+            if not models and capture.messages:
+                err = _humanize_litellm_error(capture.messages[-1])
+                logger.debug(f"LLM model list: 0 models, surfacing captured litellm warning: {err!r}")
+                return jsonify({'models': [], 'error': err}), 400
+
+            logger.debug(f"LLM model list: got {len(models)} models for provider={provider!r}")
+            return jsonify({'models': models, 'error': None})
+        except Exception as e:
+            logger.error(f"LLM model list failed for provider={provider!r}: {e}")
+            logger.exception("LLM model list full traceback:")
+            return jsonify({'models': [], 'error': str(e)}), 400
+
+    @llm_blueprint.route("/test", methods=['GET'])
+    @login_optionally_required
+    def llm_test():
+        from flask import request
+        from changedetectionio.llm.client import completion
+        from changedetectionio.validate_url import is_llm_api_base_safe
+
+        # Pull stored config as the fallback, then override with anything the
+        # form-driven JS sent as query params. Lets users test config changes
+        # without first hitting Save (matching how /settings/llm/models works).
+        stored = datastore.data['settings']['application'].get('llm') or {}
+        # Keep the raw request-supplied values around so we can detect whether
+        # the caller explicitly steered api_base / api_key (credential-exfil guard below).
+        req_api_key  = (request.args.get('api_key')  or '').strip()
+        req_api_base = (request.args.get('api_base') or '').strip()
+        stored_api_base = (stored.get('api_base') or '').strip()
+        llm_cfg = {
+            'model':                   (request.args.get('model')                   or stored.get('model', '')).strip(),
+            'api_key':                 (req_api_key  or stored.get('api_key', '')).strip(),
+            'api_base':                (req_api_base or stored_api_base).strip(),
+            'provider_kind':           (request.args.get('provider_kind')           or stored.get('provider_kind', '')).strip(),
+            'local_token_multiplier':   request.args.get('local_token_multiplier')  or stored.get('local_token_multiplier'),
+        }
+        model    = llm_cfg['model']
+        api_base = llm_cfg['api_base']
+
+        logger.debug(
+            f"LLM connection test requested: model={model!r} api_base={api_base!r} "
+            f"provider_kind={llm_cfg['provider_kind']!r} "
+            f"source={'form' if request.args.get('model') else 'datastore'}"
+        )
+
+        if not model:
+            logger.error("LLM connection test failed: no model configured")
+            return jsonify({'ok': False, 'error': 'No model configured.'}), 400
+
+        ok, reason = is_llm_api_base_safe(api_base)
+        if not ok:
+            logger.warning(f"LLM connection test refused: api_base failed SSRF check ({reason})")
+            return jsonify({'ok': False, 'error': reason}), 400
+
+        # Credential-exfiltration guard (GHSA-g36r-fm2p-87xm).
+        # If the caller specified an api_base that differs from the saved one but
+        # did NOT supply a matching api_key, refuse to substitute the stored key.
+        # Otherwise a CSRF / unauthenticated request can route the operator's
+        # long-lived provider key to an attacker-controlled endpoint.
+        if req_api_base and req_api_base != stored_api_base and not req_api_key:
+            logger.warning("LLM connection test refused: api_base differs from saved config but no api_key supplied")
+            return jsonify({'ok': False, 'error': gettext(
+                "api_key is required when api_base differs from the saved configuration. "
+                "Refusing to send the stored API key to a different endpoint."
+            )}), 400
+
+        try:
+            logger.debug(f"LLM connection test: sending test prompt to model={model!r}")
+            # Reuse the same multiplier path the production calls use, so cloud providers
+            # stay on a small base cap (matching upstream's pre-existing behavior) and only
+            # reasoning-capable endpoints (Ollama, openai_compatible) opt into the extra
+            # headroom needed for chain-of-thought to complete.
+            # Timeout: omit the override so the test inherits DEFAULT_TIMEOUT (60s, tunable
+            # via LLM_TIMEOUT). A shorter test-only timeout falsely fails on cold-starting
+            # cloud reasoning models (e.g. ollama.com hosting qwen3.5:397b takes ~60s on
+            # first hit) even though the same call succeeds in production.
+            from changedetectionio.llm.evaluator import apply_local_token_multiplier, get_llm_settings
+            text, total_tokens, input_tokens, output_tokens = completion(
+                model=model,
+                messages=[{'role': 'user', 'content':
+                    'Respond with just the word: ready'}],
+                api_key=llm_cfg.get('api_key') or None,
+                api_base=api_base or None,
+                max_tokens=apply_local_token_multiplier(200, llm_cfg),
+                debug=get_llm_settings(datastore).debug,
+            )
+            reply = text.strip()
+            if not reply:
+                logger.warning(
+                    f"LLM connection test: model={model!r} responded but returned empty content "
+                    f"tokens={total_tokens} (in={input_tokens} out={output_tokens}) — "
+                    f"check finish_reason in client debug log above"
+                )
+                return jsonify({'ok': False, 'error': 'Model responded but returned empty content — check server logs.'}), 400
+
+            logger.success(
+                f"LLM connection test OK: model={model!r} "
+                f"tokens={total_tokens} (in={input_tokens} out={output_tokens}) "
+                f"reply={reply!r}"
+            )
+            return jsonify({'ok': True, 'text': reply, 'tokens': total_tokens})
+
+        except Exception as e:
+            logger.error(f"LLM connection test FAILED: model={model!r} api_base={api_base!r} error={e}")
+            logger.exception("LLM connection test full traceback:")
+            return jsonify({'ok': False, 'error': str(e)}), 400
+
+    # Both clear endpoints accept POST only — GET would let an attacker fire them via
+    # <img src="...">, wiping LLM configuration / cached summaries on a logged-in
+    # operator's browser (GHSA-g36r-fm2p-87xm). Flask-WTF CSRFProtect enforces a
+    # CSRF token on POST automatically; the template renders csrf_token() inside the
+    # surrounding <form>.
+    @llm_blueprint.route("/clear", methods=['POST'])
+    @login_optionally_required
+    def llm_clear():
+        from changedetectionio.model.LLMSettings import LLMSettings
+        logger.debug("LLM configuration cleared by user")
+        # Read existing config, write back a dict that omits the connection fields —
+        # so the saved dict no longer has model/api_key/api_base/etc.
+        # Toggles, prompts, budgets and counters survive.
+        settings = LLMSettings.model_validate(
+            datastore.data['settings']['application'].get('llm') or {}
+        )
+        datastore.data['settings']['application']['llm'] = settings.model_dump(
+            exclude=set(LLMSettings.CONNECTION_FIELDS)
+        )
+        datastore.commit()
+        flash(gettext("AI / LLM configuration removed."), 'notice')
+        return redirect(url_for('settings.settings_page') + '#ai')
+
+    @llm_blueprint.route("/clear-summary-cache", methods=['POST'])
+    @login_optionally_required
+    def llm_clear_summary_cache():
+        import glob
+        count = 0
+        for watch in datastore.data['watching'].values():
+            if not watch.data_dir:
+                continue
+            for f in glob.glob(os.path.join(watch.data_dir, 'change-summary-*.txt')):
+                try:
+                    os.remove(f)
+                    logger.info(f"LLM summary cache removed: {f}")
+                    count += 1
+                except OSError as e:
+                    logger.warning(f"Could not remove LLM summary cache file {f}: {e}")
+        logger.info(f"LLM summary cache cleared: {count} file(s) removed")
+        flash(gettext("AI summary cache cleared ({} file(s) removed).").format(count), 'notice')
+        return redirect(url_for('settings.settings_page') + '#ai')
+
+    return llm_blueprint
@@ -9,6 +9,7 @@
    const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
 {% endif %}
 </script>
+<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
 <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
@@ -23,17 +24,19 @@
            <li class="tab"><a href="#fetching">{{ _('Fetching') }}</a></li>
            <li class="tab"><a href="#filters">{{ _('Global Filters') }}</a></li>
            <li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
-            <li class="tab"><a href="#api">{{ _('API') }}</a></li>
-            <li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
+            <li class="tab"><a href="#api">API</a></li>
+            <li class="tab"><a href="#rss">RSS</a></li>
            <li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
            <li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
            <li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
-            <li class="tab"><a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Browsers') }}</a></li>
            {% if plugin_tabs %}
                {% for tab in plugin_tabs %}
            <li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
                {% endfor %}
            {% endif %}
+            {% if not llm_features_disabled %}
+            <li class="tab"><a href="#ai">{{ _('AI / LLM') }}</a></li>
+            {% endif %}
            <li class="tab"><a href="#info">{{ _('Info') }}</a></li>
        </ul>
    </div>
@@ -57,7 +60,7 @@
                        {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
                        <span class="pure-form-message-inline">{{ _('After this many consecutive times that the CSS/xPath filter is missing, send a notification') }}
                            <br>
-                        {{ _('Set to') }} <strong>0</strong> {{ _('to disable') }}
+                        {{ _('Set to <strong>0</strong> to disable')|safe }}
                        </span>
                    </div>
                    <div class="pure-control-group">
@@ -116,11 +119,18 @@
            </div>

            <div class="tab-pane-inner" id="fetching">
-                <fieldset class="pure-group" id="webdriver-override-options">
+                <div class="pure-control-group inline-radio">
+                    {{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
+                    <span class="pure-form-message-inline">
+                        <p>{{ _('Use the <strong>Basic</strong> method (default) where your watched sites don\'t need Javascript to render.')|safe }}</p>
+                        <p>{{ _('The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.')|safe }}</p>
+                    </span>
+                </div>
+                <fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
                    <div class="pure-form-message-inline">
                        <strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
                        <br>
-                        {{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
+                        {{ _('This will wait <i>n</i> seconds before extracting the text.')|safe }}
                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.application.form.webdriver_delay) }}
@@ -140,6 +150,13 @@
                    {{ render_field(form.requests.form.timeout) }}
                    <span class="pure-form-message-inline">{{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}</span><br>
                </div>
+                <div class="pure-control-group inline-radio">
+                    {{ render_field(form.requests.form.default_ua) }}
+                    <span class="pure-form-message-inline">
+                        {{ _('Applied to all requests.') }}<br><br>
+                        {{ _('Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it\'s important to consider') }} <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">{{ _('all of the ways that the browser is detected') }}</a>.
+                    </span>
+                </div>
                <div class="pure-control-group">
                <br>
                    {{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
@@ -182,7 +199,7 @@ nav
                    <span class="pure-form-message-inline">{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</span><br>
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>{{ _('Matching text will be') }} <strong>{{ _('ignored') }}</strong> {{ _('in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
+                            <li>{{ _('Matching text will be ignored in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
                            <li>{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</li>
                            <li>{{ _('Each line processed separately, any line matching will be ignored (removed before creating the checksum)') }}</li>
                            <li>{{ _('Regular Expression support, wrap the entire line in forward slash') }} <code>/regex/</code></li>
@@ -250,7 +267,7 @@ nav
                    </div>
                    <div>
                       {{ render_field(form.application.form.rss_template_override) }}
-                        {{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss") }}
+                        {{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss", settings_application=settings_application) }}
                    </div>
                </div>
                <br>
@@ -379,6 +396,9 @@ nav
            </div>
                {% endfor %}
            {% endif %}
+            {% if not llm_features_disabled %}
+            {% include 'settings_llm_tab.html' %}
+            {% endif %}
            <div class="tab-pane-inner" id="info">
                <p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
                <p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
@@ -0,0 +1,611 @@
+{% from '_helpers.html' import render_field %}
+{% from '_stab.html' import stab_shell, stab_pane %}
+{#
+  AI / LLM settings tab content — included from settings.html.
+  Requires template context: form, llm_config, llm_env_configured
+#}
+<div class="tab-pane-inner" id="ai">
+<script src="{{ url_for('static_content', group='js', filename='sub-tabs.js') }}"></script>
+
+{# TRANSLATORS: 'Usage' here means token consumption/cost stats for the AI provider, not a how-to guide #}
+{% set _usage_label = pgettext('AI usage stats', 'Usage') %}
+{% call stab_shell('ai-settings', [
+    {'id': 'overview',  'label': _('Overview'),  'icon': '✦'},
+    {'id': 'provider',  'label': _('Provider'),  'icon': '⚙'},
+    {'id': 'prompts',   'label': _('Prompts'),   'icon': '≡'},
+    {'id': 'behaviour', 'label': _('Behaviour'), 'icon': '⚑'},
+    {'id': 'usage',     'label': _usage_label,   'icon': '$'},
+]) %}
+
+  {# ── Overview ──────────────────────────────────────────────────────────── #}
+  {% call stab_pane('overview') %}
+  <div class="stab-overview-hero">
+    <h3><span class="stab-overview-glyph">✦</span> {{ _('AI-powered change monitoring') }}</h3>
+    <p>{{ _('Connect an LLM to move from "something changed" to "only the thing you care about changed".') }}</p>
+  </div>
+
+  <div class="stab-overview-features">
+    <div class="stab-overview-feature">
+      <div class="stab-overview-icon">⊞</div>
+      <div class="stab-overview-text">
+        <strong>{{ _('Intent filtering') }}</strong>
+        <p>{{ _('Each watch or tag can carry a plain-text intent — %(ex1)s or %(ex2)s. On every detected change the AI evaluates the diff against it and suppresses irrelevant noise.', ex1='<strong>"notify me only when the price drops"</strong>', ex2='<strong>"alert when the item goes out of stock"</strong>') | safe }}</p>
+        <p><small>{{ _('Tip: intent evaluation benefits from a capable model — recommended %(local)s locally, or %(gpt)s / %(gemini)s. Very small models (≤3B) may misjudge numeric comparisons.',
+              local='<code>qwen2.5:7b</code>',
+              gpt='<code>gpt-4o-mini</code>',
+              gemini='<code>gemini-2.0-flash</code>') | safe }}</small></p>
+      </div>
+    </div>
+    <div class="stab-overview-feature">
+      <div class="stab-overview-icon">≡</div>
+      <div class="stab-overview-text">
+        <strong>{{ _('AI Change Summary') }}</strong>
+        <p>{{ _('Instead of raw diffs, receive plain-language summaries in notifications — %(ex1)s or %(ex2)s. Set a global default prompt here, or override per watch or tag.', ex1='<strong>"Price dropped from $89 to $67"</strong>', ex2='<strong>"3 new items added to the listing"</strong>') | safe }}</p>
+      </div>
+    </div>
+    <div class="stab-overview-feature">
+      <div class="stab-overview-icon">≈</div>
+      <div class="stab-overview-text">
+        <strong>{{ _('Minimal cost') }}</strong>
+        <p>{{ _('The AI sees only a unified diff of what changed — never full page HTML. Low-cost models like %(gpt)s or %(gemini)s handle this well, typically fractions of a cent per check.',
+              gpt='<a href="https://platform.openai.com/api-keys" target="_blank" rel="noopener">gpt-4o-mini</a>',
+              gemini='<a href="https://aistudio.google.com/apikey" target="_blank" rel="noopener">Gemini Flash</a>') | safe }}</p>
+      </div>
+    </div>
+  </div>
+
+  <div class="stab-overview-cta">
+    {% if llm_config and llm_config.get('model') %}
+    <span class="stab-configured-badge">&#10003; {{ _('AI / LLM configured:') }} {{ llm_config.get('model') }}</span>
+    {% else %}
+    <button type="button" class="pure-button pure-button-primary" data-stab-goto="provider">
+      ⚙ {{ _('Configure AI Provider') }} &rarr;
+    </button>
+    {% endif %}
+  </div>
+  {% endcall %}
+
+  {# ── Provider ──────────────────────────────────────────────────────────── #}
+  {% call stab_pane('provider') %}
+  <p class="stab-section-title">{{ _('AI Provider') }}</p>
+
+  <div class="pure-control-group">
+    <label></label>
+    {{ form.llm.form.enabled() }}
+    <label for="{{ form.llm.form.enabled.id }}" style="display:inline; font-weight:normal;">
+      {{ form.llm.form.enabled.label.text }}
+    </label>
+    <span class="pure-form-message-inline">
+      {{ _('Master switch — when off, all AI lookups are skipped even if a provider is configured below.') }}
+    </span>
+  </div>
+
+  {% if not llm_env_configured and not (llm_config and llm_config.get('model')) %}
+  <div class="stab-overview-disclaimer">
+    <div class="stab-disclaimer-icon">⚠</div>
+    <div class="stab-disclaimer-body">
+      <strong>{{ _('Third-party data transfer — please read') }}</strong>
+      <p>{{ _('When AI features are active, change data from the websites you monitor — including page diffs and extracted text — is sent to an external AI provider of your choice.') }}</p>
+      <ul>
+        <li>{{ _('You are solely responsible for ensuring this complies with the terms of service of each website you monitor.') }}</li>
+        <li>{{ _("You are solely responsible for compliance with applicable data-protection laws (e.g. GDPR) regarding any personal data that may appear in monitored content.") }}</li>
+        <li>{{ _('API costs charged by your chosen provider are your own responsibility; this software has no visibility into or control over those charges.') }}</li>
+        <li>{{ _('AI / LLM models are known to hallucinate — producing plausible-sounding but factually incorrect or entirely fabricated output with apparent confidence — and by design may omit or truncate relevant data during summarisation. AI output must never be relied upon as complete or accurate. This software is provided as-is with no warranty of any kind.') }}</li>
+        <li>{{ _('By enabling AI features you personally indemnify and hold harmless the creator(s) and contributor(s) of this software from any claims, damages, or liability arising from this data transfer or your use of AI features.') }}</li>
+      </ul>
+      <label class="stab-disclaimer-check">
+        <input type="checkbox" id="llm-disclaimer-accept" onchange="llmDisclaimerToggle(this)">
+        <span>{{ _('I have read and understood the above. I accept full responsibility and indemnify the creator(s) of this software.') }}</span>
+      </label>
+    </div>
+  </div>
+  <div id="llm-provider-fields" style="display:none">
+  {% endif %}
+
+  {% if llm_env_configured %}
+  <div class="inline-warning" style="margin-bottom: 1em;">
+    <img class="inline-warning-icon" src="{{ url_for('static_content', group='images', filename='notice.svg') }}" alt="{{ _('Note') }}">
+    {{ _('AI / LLM is configured via environment variables (<code>LLM_MODEL=%(model)s</code>%(api_base)s). Remove the <code>LLM_MODEL</code> environment variable to configure via this form instead.',
+         model=llm_config.get('model', '')|e,
+         api_base=(', <code>LLM_API_BASE=' ~ (llm_config.get('api_base')|e) ~ '</code>') if llm_config.get('api_base') else '') | safe }}
+  </div>
+  {% else %}
+
+  <div class="pure-control-group">
+    <label for="llm-provider">{{ _('Provider') }}</label>
+    <select id="llm-provider" onchange="llmOnProviderChange(this.value)">
+      <option value="">— {{ _('select a provider') }} —</option>
+        <option value="anthropic">Anthropic</option>
+        <option value="gemini">Google (Gemini)</option>
+        <option value="ollama">Ollama</option>
+        <option value="openai">OpenAI</option>
+        <option value="openai_compatible">{{ _('OpenAI-compatible (vLLM, LM Studio, llama.cpp)') }}</option>
+        <option value="openrouter">OpenRouter (200+ models)</option>
+    </select>
+  </div>
+
+  <div class="pure-control-group">
+    {{ render_field(form.llm.form.api_key) }}
+    <span class="pure-form-message-inline" id="llm-key-hint"></span>
+  </div>
+  <div class="pure-control-group" id="llm-base-group" style="display:none">
+    {{ render_field(form.llm.form.api_base) }}
+    <span class="pure-form-message-inline">{{ _('Only needed for Ollama or custom/self-hosted endpoints. Leave blank for cloud providers.') }}</span>
+  </div>
+
+  {# Hidden field carrying the dropdown selection so the backend knows when to apply
+     reasoning-friendly token caps (Ollama and OpenAI-compatible endpoints, which commonly
+     serve reasoning models that need headroom for chain-of-thought to complete). #}
+  {{ form.llm.form.provider_kind() }}
+
+  <div class="pure-control-group" id="llm-local-advanced-group" style="display:none">
+    <label for="{{ form.llm.form.local_token_multiplier.id }}">{{ form.llm.form.local_token_multiplier.label.text }}</label>
+    {{ form.llm.form.local_token_multiplier() }}
+    <span class="pure-form-message-inline">
+      {{ _('Reasoning models (Qwen3, DeepSeek-R1, Gemma 3, etc.) emit chain-of-thought before the final answer. This multiplier scales every <code>max_tokens</code> cap for this endpoint to leave reasoning room. Defaults to %(default)s; raise it if responses come back truncated or empty, lower it (down to 1x) if you want tighter limits on a paid endpoint. Applied to Ollama and OpenAI-compatible endpoints — other cloud providers (OpenAI, Anthropic, Gemini) keep their original tight caps.', default='5x') | safe }}
+    </span>
+  </div>
+
+  <div class="pure-control-group" id="llm-fetch-group" style="display:none">
+    <label></label>
+    <button type="button" id="llm-fetch-btn" class="pure-button button-xsmall" onclick="llmFetchModels()"
+            style="background:#27ae60;color:#fff;border:none;">
+      &#8635; {{ _('Load available models') }}
+    </button>
+    <span id="llm-fetch-status" style="margin-left:.6em;font-size:.85em;color:#888;"></span>
+  </div>
+
+  <div class="pure-control-group" id="llm-model-select-group" style="display:none">
+    <label for="llm-model-select">{{ _('Available models') }}</label>
+    <select id="llm-model-select" class="pure-input-1-2" onchange="llmOnModelPick(this.value)">
+      <option value="">— {{ _('choose a model') }} —</option>
+    </select>
+  </div>
+
+  <div class="pure-control-group">
+    {{ render_field(form.llm.form.model,
+                    placeholder=_("Enter API key and click 'Load available models'")) }}
+  </div>
+
+  {% if llm_config and llm_config.get('model') %}
+  <div class="pure-control-group">
+    <label></label>
+    <span style="color:#4a7c59;font-weight:bold;">
+      &#10003; {{ _('AI / LLM configured:') }} {{ llm_config.get('model') }}
+    </span>
+    &nbsp;
+    {# data-method="POST" tells modal.js to POST with the CSRF token instead of
+       navigating — GET previously allowed <img>-based CSRF wipe (GHSA-g36r-fm2p-87xm).
+       Stays as <a> because we're inside the outer settings <form> — nested forms are
+       invalid HTML, so modal.js builds a body-level hidden form for the POST. #}
+    <a href="{{ url_for('settings.llm.llm_clear') }}"
+       class="pure-button button-xsmall"
+       style="background:#c0392b;color:#fff;"
+       data-method="POST"
+       data-requires-confirm
+       data-confirm-type="danger"
+       data-confirm-title="{{ _('Remove AI / LLM configuration?') }}"
+       data-confirm-message="<p>{{ _('This will remove your saved AI provider, model, and API key.') }}</p>"
+       data-confirm-button="{{ _('Remove') }}"
+       data-cancel-button="{{ _('Cancel') }}">
+      &#10005; {{ _('Remove') }}
+    </a>
+    &nbsp;
+    <button type="button" id="llm-test-btn" class="pure-button button-xsmall" onclick="llmRunTest()"
+            style="background:#2980b9;color:#fff;border:none;">
+      &#9654; {{ _('Test connection') }}
+    </button>
+  </div>
+  <div id="llm-test-result" style="display:none; margin-top:0.6em; padding:0.6em 0.85em; border-radius:5px; font-size:0.88em; line-height:1.45;"></div>
+  {% endif %}
+
+  <p class="pure-form-message-inline" style="margin-top:0.5em;">
+    {{ _("Your API key is stored locally and sent only to your chosen provider. On each detected change, the watch's diff and extracted text are sent to the LLM — no full page HTML.") }}
+  </p>
+
+  <div class="pure-control-group" style="margin-top:1.2em; padding-top:1em; border-top:1px solid rgba(128,128,128,0.15);">
+    <label style="color:#888; font-size:0.85em;">{{ _('Cache') }}</label>
+    {# See comment above on data-method="POST"+modal.js (GHSA-g36r-fm2p-87xm). #}
+    <a href="{{ url_for('settings.llm.llm_clear_summary_cache') }}"
+       class="pure-button button-xsmall"
+       style="background:#7f8c8d;color:#fff;"
+       data-method="POST"
+       data-requires-confirm
+       data-confirm-type="warning"
+       data-confirm-title="{{ _('Clear all summary cache?') }}"
+       data-confirm-message="<p>{{ _('This will remove all cached AI change summaries across all watches.') }}</p><p>{{ _('They will be regenerated on the next check.') }}</p>"
+       data-confirm-button="{{ _('Clear cache') }}"
+       data-cancel-button="{{ _('Cancel') }}">
+      &#10005; {{ _('Clear all summary cache') }}
+    </a>
+    <span class="pure-form-message-inline">{{ _('Removes all cached AI change summaries across all watches. They will be regenerated on the next check.') }}</span>
+  </div>
+
+  <div class="pure-control-group">
+    <label></label>
+    {{ form.llm.form.debug() }}
+    <label for="{{ form.llm.form.debug.id }}" style="display:inline; font-weight:normal;">
+      {{ form.llm.form.debug.label.text }}
+    </label>
+    <span class="pure-form-message-inline">
+      {{ _('Enables litellm verbose output (routed through loguru). Useful when diagnosing provider errors or empty responses. Leave off in production — generates a lot of log volume.') }}
+    </span>
+  </div>
+  {% endif %}{# llm_env_configured #}
+
+  {% if not llm_env_configured and not (llm_config and llm_config.get('model')) %}
+  </div>{# llm-provider-fields #}
+  {% endif %}
+  {% endcall %}
+
+  {# ── Prompts ───────────────────────────────────────────────────────────── #}
+  {% call stab_pane('prompts') %}
+  <p class="stab-section-title">{{ _('Default AI Change Summary') }}</p>
+
+  <div class="pure-control-group">
+    {{ render_field(form.llm.form.change_summary_default) }}
+    <span class="pure-form-message-inline">
+      {{ _('Used for all watches unless overridden by the watch or its tag/group.') }}
+      &nbsp;<a href="#" class="pure-button button-small" onclick="var t=document.getElementById('llm-change_summary_default'); if(!t.value && t.placeholder) t.value=t.placeholder; return false;">{{ _('Modify default prompt') }}</a>
+    </span>
+  </div>
+
+  {% endcall %}
+
+  {# ── Behaviour ─────────────────────────────────────────────────────────── #}
+  {% call stab_pane('behaviour') %}
+  <p class="stab-section-title">{{ _('Behaviour') }}</p>
+
+  {% if llm_config and llm_config.get('model') %}
+  <div class="pure-control-group">
+    <label></label>
+    {{ form.llm.form.override_diff_with_summary() }}
+    <label for="{{ form.llm.form.override_diff_with_summary.id }}" style="display:inline; font-weight:normal;">
+      {{ form.llm.form.override_diff_with_summary.label.text }}
+    </label>
+    <span class="pure-form-message-inline">
+      {{ _('When enabled, the <code>%(diff)s</code> notification token shows the AI summary instead of the raw diff. Use <code>%(raw_diff)s</code> to always get the original.',
+           diff='{{diff}}', raw_diff='{{raw_diff}}') | safe }}
+    </span>
+  </div>
+
+  <div class="pure-control-group">
+    <label></label>
+    {{ form.llm.form.restock_use_fallback_extract() }}
+    <label for="{{ form.llm.form.restock_use_fallback_extract.id }}" style="display:inline; font-weight:normal;">
+      {{ form.llm.form.restock_use_fallback_extract.label.text }}
+    </label>
+    <span class="pure-form-message-inline">
+      {{ _('When enabled, the AI will be used as a last resort to extract price and stock status from product pages where no structured metadata (JSON-LD, microdata, OpenGraph) is found.') }}
+    </span>
+  </div>
+
+  <div class="pure-control-group">
+    <label for="{{ form.llm.form.thinking_budget.id }}">{{ form.llm.form.thinking_budget.label.text }}</label>
+    {{ form.llm.form.thinking_budget() }}
+    <span class="pure-form-message-inline">{{ _('For Gemini 2.5+ models only. Thinking tokens improve reasoning quality but count against the output budget. Set to Off if summaries are being cut short.') }}</span>
+  </div>
+
+  <div class="pure-control-group">
+    <label for="{{ form.llm.form.max_summary_tokens.id }}">{{ form.llm.form.max_summary_tokens.label.text }}</label>
+    {{ form.llm.form.max_summary_tokens() }}
+    <span class="pure-form-message-inline">{{ _('Upper limit on tokens the AI may use when writing a change summary. Higher values allow longer summaries but cost more.') }}</span>
+  </div>
+
+  <div class="pure-control-group">
+    <label>{{ form.llm.form.budget_action.label.text }}</label>
+    <div>
+      {% for subfield in form.llm.form.budget_action %}
+      <label class="pure-radio" style="display:block; font-weight:normal; margin-bottom:0.3em;">
+        {{ subfield() }} {{ subfield.label.text }}
+      </label>
+      {% endfor %}
+    </div>
+  </div>
+  {% else %}
+  <p class="pure-form-message-inline" style="margin-top:0.5em;">
+    {{ _('Configure a provider first to unlock behaviour settings.') }}
+  </p>
+  {% endif %}
+  {% endcall %}
+
+  {# ── Usage ─────────────────────────────────────────────────────────────── #}
+  {% call stab_pane('usage') %}
+  <p class="stab-section-title">{{ _('Token & Cost Tracking') }}</p>
+
+  {% if llm_stored.get('tokens_total_cumulative') or llm_stored.get('tokens_this_month') %}
+
+  <div class="llm-usage-grid">
+    <div class="llm-stat-card">
+      <div class="llm-stat-label">{{ _('This month') }}</div>
+      <div class="llm-stat-value">{{ '{:,}'.format(llm_stored.get('tokens_this_month', 0)) }}</div>
+      <div class="llm-stat-sub">{{ _('tokens') }}{% if llm_show_costs and llm_stored.get('cost_usd_this_month') %} &nbsp;·&nbsp; ≈&thinsp;${{ '%.4f'|format(llm_stored.get('cost_usd_this_month', 0)) }}{% endif %}</div>
+      {% if llm_token_budget_month %}
+      {% set pct = (llm_stored.get('tokens_this_month', 0) / llm_token_budget_month * 100)|int %}
+      <div class="llm-stat-bar-wrap">
+        <div class="llm-stat-bar-fill {% if pct >= 100 %}bar-over{% elif pct >= 80 %}bar-warn{% else %}bar-ok{% endif %}"
+             style="width:{{ [pct, 100]|min }}%"></div>
+      </div>
+      <div class="llm-stat-budget-text">{{ _('%(percent)s%% of %(budget)s', percent=pct, budget='{:,}'.format(llm_token_budget_month)) }}</div>
+      {% endif %}
+    </div>
+
+    <div class="llm-stat-card">
+      <div class="llm-stat-label">{{ _('All-time total') }}</div>
+      <div class="llm-stat-value">{{ '{:,}'.format(llm_stored.get('tokens_total_cumulative', 0)) }}</div>
+      <div class="llm-stat-sub">{{ _('tokens') }}{% if llm_show_costs and llm_stored.get('cost_usd_total_cumulative') %} &nbsp;·&nbsp; ≈&thinsp;${{ '%.4f'|format(llm_stored.get('cost_usd_total_cumulative', 0)) }}{% endif %}</div>
+    </div>
+  </div>
+
+  {% if llm_token_budget_month and llm_stored.get('tokens_this_month', 0) >= llm_token_budget_month %}
+  <p class="llm-budget-alert">&#9888; {{ _('Monthly token budget reached. AI summarisation is paused until next month.') }}</p>
+  {% endif %}
+
+  <div class="llm-usage-settings">
+    <div class="llm-usage-row">
+      <span class="llm-usage-row-label">{{ _('Token budget this period') }}</span>
+      <span class="llm-usage-row-value">
+        {% if llm_token_budget_month_env %}
+          <strong>{{ '{:,}'.format(llm_token_budget_month_env) }}</strong>
+          <span class="llm-env-badge">{{ _('(set via <code>LLM_TOKEN_BUDGET_MONTH</code>)') | safe }}</span>
+          <input type="hidden" name="llm-token_budget_month" value="{{ llm_token_budget_month_env }}">
+        {% else %}
+          {{ form.llm.form.token_budget_month(placeholder=_('0 = unlimited'), value=llm_stored.get('token_budget_month', 0) or '') }}
+          <span class="llm-field-hint">{{ _('tokens (0 = unlimited)') }}</span>
+        {% endif %}
+      </span>
+    </div>
+    {% if llm_stored.get('tokens_month_key') %}
+    <div class="llm-usage-row">
+      <span class="llm-usage-row-label">{{ _('Current billing period') }}</span>
+      <span class="llm-usage-row-value">{{ llm_stored.get('tokens_month_key') }}</span>
+    </div>
+    {% endif %}
+    <div class="llm-usage-row">
+      <span class="llm-usage-row-label">{{ _('Max input characters') }}</span>
+      <span class="llm-usage-row-value">
+        {% if llm_max_input_chars_env %}
+          {{ form.llm.form.max_input_chars(value=llm_max_input_chars_env, readonly=True, style="width:10em;opacity:0.6;cursor:not-allowed;") }}
+          <span class="llm-env-badge">{{ _('(set via <code>LLM_MAX_INPUT_CHARS</code>)') | safe }}</span>
+        {% else %}
+          {{ form.llm.form.max_input_chars(placeholder='100000', value=llm_stored.get('max_input_chars', 100000) or '') }}
+          <span class="llm-field-hint">{{ _('characters — currently enforcing: %(limit)s', limit='{:,}'.format(llm_effective_max_input_chars)) }}</span>
+        {% endif %}
+      </span>
+    </div>
+    <div class="llm-usage-row">
+      <span class="llm-usage-row-label">{{ _('Max tokens per watch per period') }}</span>
+      <span class="llm-usage-row-value">
+        {{ form.llm.form.max_tokens_per_count_period(placeholder=_('0 = unlimited'), value=llm_stored.get('max_tokens_per_count_period', 0) or '') }}
+        <span class="llm-field-hint">{{ _('tokens — skips AI evaluation on a watch once its usage within the current period (monthly) hits this cap (0 = unlimited)') }}</span>
+      </span>
+    </div>
+  </div>
+
+  {% else %}
+  <p class="llm-no-usage">{{ _('No AI usage recorded yet.') }}</p>
+
+  <div class="llm-usage-settings">
+    <div class="llm-usage-row">
+      <span class="llm-usage-row-label">{{ _('Token budget') }}</span>
+      <span class="llm-usage-row-value">
+        {% if llm_token_budget_month_env %}
+          <strong>{{ '{:,}'.format(llm_token_budget_month_env) }}</strong>
+          <span class="llm-env-badge">{{ _('(set via <code>LLM_TOKEN_BUDGET_MONTH</code>)') | safe }}</span>
+          <input type="hidden" name="llm-token_budget_month" value="{{ llm_token_budget_month_env }}">
+        {% else %}
+          {{ form.llm.form.token_budget_month(placeholder=_('0 = unlimited'), value=llm_stored.get('token_budget_month', 0) or '') }}
+          <span class="llm-field-hint">{{ _('tokens per month (0 = unlimited)') }}</span>
+        {% endif %}
+      </span>
+    </div>
+    <div class="llm-usage-row">
+      <span class="llm-usage-row-label">{{ _('Max input characters') }}</span>
+      <span class="llm-usage-row-value">
+        {% if llm_max_input_chars_env %}
+          {{ form.llm.form.max_input_chars(value=llm_max_input_chars_env, readonly=True, style="width:10em;opacity:0.6;cursor:not-allowed;") }}
+          <span class="llm-env-badge">{{ _('(set via <code>LLM_MAX_INPUT_CHARS</code>)') | safe }}</span>
+        {% else %}
+          {{ form.llm.form.max_input_chars(placeholder='100000', value=llm_stored.get('max_input_chars', 100000) or '') }}
+          <span class="llm-field-hint">{{ _('characters — currently enforcing: %(limit)s', limit='{:,}'.format(llm_effective_max_input_chars)) }}</span>
+        {% endif %}
+      </span>
+    </div>
+    <div class="llm-usage-row">
+      <span class="llm-usage-row-label">{{ _('Max tokens per watch per period') }}</span>
+      <span class="llm-usage-row-value">
+        {{ form.llm.form.max_tokens_per_count_period(placeholder=_('0 = unlimited'), value=llm_stored.get('max_tokens_per_count_period', 0) or '') }}
+        <span class="llm-field-hint">{{ _('tokens — skips AI evaluation on a watch once its usage within the current period (monthly) hits this cap (0 = unlimited)') }}</span>
+      </span>
+    </div>
+  </div>
+  {% endif %}
+  {% endcall %}
+
+{% endcall %}{# stab_shell #}
+</div>
+
+<script>
+(function () {
+  const LIVE_PROVIDERS = ['openai', 'anthropic', 'gemini', 'ollama', 'openai_compatible', 'openrouter'];
+  const BASE_DEFAULTS  = { ollama: 'http://localhost:11434' };
+  const KEY_HINTS = {
+    openai:             '{{ _("platform.openai.com → API keys") }}',
+    anthropic:          '{{ _("console.anthropic.com → API keys") }}',
+    gemini:             '{{ _("aistudio.google.com → Get API key") }}',
+    ollama:             '{{ _("No API key needed for local Ollama") }}',
+    openai_compatible:  '{{ _("Bearer token for your self-hosted server (vLLM, LM Studio, etc.)") }}',
+    openrouter:         '{{ _("openrouter.ai → Keys") }}',
+  };
+
+  window.llmDisclaimerToggle = function (cb) {
+    const fields = document.getElementById('llm-provider-fields');
+    if (fields) fields.style.display = cb.checked ? '' : 'none';
+  };
+
+  window.llmOnProviderChange = function (provider) {
+    const fetchGroup    = document.getElementById('llm-fetch-group');
+    const baseGroup     = document.getElementById('llm-base-group');
+    const modelSelGrp   = document.getElementById('llm-model-select-group');
+    const localAdvGrp   = document.getElementById('llm-local-advanced-group');
+    const baseField     = document.querySelector('[name="llm-api_base"]');
+    const kindField     = document.querySelector('[name="llm-provider_kind"]');
+    const hint          = document.getElementById('llm-key-hint');
+
+    fetchGroup.style.display = LIVE_PROVIDERS.includes(provider) ? '' : 'none';
+
+    const needsBase = provider === 'ollama' || provider === 'openai_compatible';
+    baseGroup.style.display = needsBase ? '' : 'none';
+    if (BASE_DEFAULTS[provider] !== undefined) {
+      if (!baseField.value) baseField.value = BASE_DEFAULTS[provider];
+    }
+
+    // Persist the dropdown selection so the backend can branch on provider kind
+    // (self-hosted endpoints — 'ollama' and 'openai_compatible' — trigger the
+    // local-multiplier code path; cloud providers do not).
+    if (kindField) kindField.value = provider || '';
+
+    // Show the local-endpoint advanced settings (token multiplier) for self-hosted
+    // endpoints. Cloud providers get the original tight caps and don't see this
+    // section at all.
+    if (localAdvGrp) localAdvGrp.style.display = (provider === 'ollama' || provider === 'openai_compatible') ? '' : 'none';
+
+    hint.textContent = KEY_HINTS[provider] || '';
+    modelSelGrp.style.display = 'none';
+    document.getElementById('llm-fetch-status').textContent = '';
+  };
+
+  window.llmFetchModels = async function () {
+    const provider  = document.getElementById('llm-provider').value;
+    const apiKey    = document.querySelector('[name="llm-api_key"]').value.trim();
+    const apiBase   = document.querySelector('[name="llm-api_base"]').value.trim();
+    const btn       = document.getElementById('llm-fetch-btn');
+    const statusEl  = document.getElementById('llm-fetch-status');
+    const selGroup  = document.getElementById('llm-model-select-group');
+    const modelSel  = document.getElementById('llm-model-select');
+
+    if (!provider) { statusEl.textContent = '{{ _("Select a provider first.") }}'; return; }
+
+    btn.disabled = true;
+    btn.textContent = '⏳ {{ _("Loading…") }}';
+    statusEl.textContent = '';
+
+    const params = new URLSearchParams({ provider });
+    if (apiKey)  params.set('api_key',  apiKey);
+    if (apiBase) params.set('api_base', apiBase);
+
+    try {
+      const resp = await fetch('{{ url_for("settings.llm.llm_get_models") }}?' + params);
+      const data = await resp.json();
+
+      if (data.error) {
+        statusEl.style.color = '#c0392b';
+        statusEl.textContent = '✗ ' + data.error;
+        selGroup.style.display = 'none';
+        return;
+      }
+
+      if (!data.models || data.models.length === 0) {
+        statusEl.style.color = '#e67e22';
+        statusEl.textContent = '{{ _("No models returned by the provider.") }}';
+        selGroup.style.display = 'none';
+        return;
+      }
+
+      modelSel.innerHTML = '<option value="">{{ _("— choose a model —") }}</option>';
+      const currentModel = document.querySelector('[name="llm-model"]').value.trim();
+      for (const m of data.models) {
+        const opt = document.createElement('option');
+        opt.value = m;
+        opt.textContent = m;
+        if (m === currentModel) opt.selected = true;
+        modelSel.appendChild(opt);
+      }
+
+      selGroup.style.display = '';
+      statusEl.style.color = '#27ae60';
+      statusEl.textContent = '✓ ' + data.models.length + ' {{ _("models available with your key") }}';
+    } catch (e) {
+      statusEl.style.color = '#c0392b';
+      statusEl.textContent = '✗ {{ _("Request failed") }}: ' + e.message;
+    } finally {
+      btn.disabled = false;
+      btn.textContent = '↻ {{ _("Load available models") }}';
+    }
+  };
+
+  window.llmOnModelPick = function (value) {
+    if (value) document.querySelector('[name="llm-model"]').value = value;
+  };
+
+  window.llmRunTest = async function () {
+    const btn    = document.getElementById('llm-test-btn');
+    const result = document.getElementById('llm-test-result');
+    if (!btn || !result) return;
+
+    btn.disabled = true;
+    btn.textContent = '⏳ {{ _("Testing…") }}';
+    result.style.display = 'none';
+
+    // Send the form's current values so the user doesn't have to hit Save before
+    // testing a config change. Endpoint falls back to the stored datastore values
+    // for any field we don't send.
+    const params = new URLSearchParams();
+    const model   = (document.querySelector('[name="llm-model"]')    || {}).value || '';
+    const apiKey  = (document.querySelector('[name="llm-api_key"]')  || {}).value || '';
+    const apiBase = (document.querySelector('[name="llm-api_base"]') || {}).value || '';
+    const kind    = (document.querySelector('[name="llm-provider_kind"]') || {}).value || '';
+    const mult    = (document.querySelector('[name="llm-local_token_multiplier"]') || {}).value || '';
+    if (model.trim())   params.set('model',    model.trim());
+    if (apiKey.trim())  params.set('api_key',  apiKey.trim());
+    if (apiBase.trim()) params.set('api_base', apiBase.trim());
+    if (kind.trim())    params.set('provider_kind', kind.trim());
+    if (mult.trim())    params.set('local_token_multiplier', mult.trim());
+
+    try {
+      const resp = await fetch('{{ url_for("settings.llm.llm_test") }}?' + params);
+      const data = await resp.json();
+      if (data.ok) {
+        result.style.cssText = 'display:block; background:rgba(39,174,96,0.08); border:1px solid rgba(39,174,96,0.3); border-radius:5px; padding:0.6em 0.85em; font-size:0.88em; line-height:1.45;';
+        result.innerHTML = '<span style="color:#27ae60; font-weight:600;">&#10003; {{ _("Connected") }}</span>'
+          + (data.tokens ? ' <span style="opacity:0.55; font-size:0.9em;">(' + data.tokens + ' {{ _("tokens") }})</span>' : '')
+          + '<br><em style="opacity:0.75;">' + data.text.replace(/</g,'&lt;') + '</em>';
+      } else {
+        result.style.cssText = 'display:block; background:rgba(192,57,43,0.07); border:1px solid rgba(192,57,43,0.25); border-radius:5px; padding:0.6em 0.85em; font-size:0.88em; line-height:1.45;';
+        result.innerHTML = '<span style="color:#c0392b; font-weight:600;">&#10007; {{ _("Failed") }}</span><br><code style="font-size:0.92em; word-break:break-all;">' + (data.error || '').replace(/</g,'&lt;') + '</code>';
+      }
+    } catch (e) {
+      result.style.cssText = 'display:block; background:rgba(192,57,43,0.07); border:1px solid rgba(192,57,43,0.25); border-radius:5px; padding:0.6em 0.85em; font-size:0.88em;';
+      result.innerHTML = '<span style="color:#c0392b; font-weight:600;">&#10007; {{ _("Request failed") }}</span>: ' + e.message.replace(/</g,'&lt;');
+    } finally {
+      btn.disabled = false;
+      btn.textContent = '▶ {{ _("Test connection") }}';
+    }
+  };
+
+  // On page load: detect and pre-select provider from current model
+  (function detectCurrentProvider() {
+    const modelField = document.querySelector('[name="llm-model"]');
+    if (!modelField) return;
+    const m = modelField.value.trim();
+    if (!m) return;
+
+    let guessed = '';
+    if (m.startsWith('gemini/'))       guessed = 'gemini';
+    else if (m.startsWith('ollama/'))  guessed = 'ollama';
+    else if (m.startsWith('openrouter/')) guessed = 'openrouter';
+    else if (m.startsWith('openai/')) {
+      // openai/<model> + custom api_base = self-hosted OpenAI-compatible (vLLM etc.)
+      const baseField = document.querySelector('[name="llm-api_base"]');
+      guessed = (baseField && baseField.value.trim()) ? 'openai_compatible' : 'openai';
+    }
+    else if (m.startsWith('claude'))   guessed = 'anthropic';
+    else if (m.startsWith('gpt') || m.startsWith('o1') || m.startsWith('o3')) guessed = 'openai';
+
+    if (guessed) {
+      const sel = document.getElementById('llm-provider');
+      if (sel) { sel.value = guessed; llmOnProviderChange(guessed); }
+    }
+  })();
+}());
+</script>
@@ -5,6 +5,7 @@ from loguru import logger

 from changedetectionio.store import ChangeDetectionStore
 from changedetectionio.flask_app import login_optionally_required
+from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config


 def construct_blueprint(datastore: ChangeDetectionStore):
@@ -22,11 +23,14 @@ def construct_blueprint(datastore: ChangeDetectionStore):

        tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])

+        from changedetectionio import processors
        output = render_template("groups-overview.html",
                                 app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
                                 available_tags=sorted_tags,
                                 form=add_form,
+                                 generate_tag_colors=processors.generate_processor_badge_colors,
                                 tag_count=tag_count,
+                                 wcag_text_color=processors.wcag_text_color,
                                 )

        return output
@@ -180,6 +184,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            'form': form,
            'watch': default,
            'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
+            'llm_configured': bool(_get_llm_config(datastore)),
        }

        included_content = {}
@@ -208,9 +213,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            template = env.from_string(template_str)
            included_content = template.render(**template_args)

+        # Watches whose URL currently matches this tag's pattern
+        matching_watches = {
+            w_uuid: watch
+            for w_uuid, watch in datastore.data['watching'].items()
+            if default.matches_url(watch.get('url', ''))
+        }
+
        output = render_template("edit-tag.html",
                                 extra_form_content=included_content,
                                 extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
+                                 matching_watches=matching_watches,
                                 settings_application=datastore.data['settings']['application'],
                                 **template_args
                                 )
@@ -2,20 +2,31 @@ from wtforms import (
    Form,
    StringField,
    SubmitField,
+    TextAreaField,
    validators,
 )
 from wtforms.fields.simple import BooleanField
+from flask_babel import lazy_gettext as _l

 from changedetectionio.processors.restock_diff.forms import processor_settings_form as restock_settings_form
+from changedetectionio.llm.ui_strings import LLM_INTENT_TAG_PLACEHOLDER
+from changedetectionio.llm.evaluator import DEFAULT_CHANGE_SUMMARY_PROMPT

 class group_restock_settings_form(restock_settings_form):
-    overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
+    overrides_watch = BooleanField(_l('Activate for individual watches in this tag/group?'), default=False)
+    url_match_pattern = StringField(_l('Auto-apply to watches with URLs matching'),
+                                    render_kw={"placeholder": _l("e.g. *://example.com/* or github.com/myorg")})
+    tag_colour = StringField(_l('Tag colour'), default='')
+    llm_intent = TextAreaField('AI Change Intent',
+                               validators=[validators.Optional(), validators.Length(max=2000)],
+                               render_kw={"rows": "5", "placeholder": LLM_INTENT_TAG_PLACEHOLDER})
+
+    llm_change_summary = TextAreaField('AI Change Summary',
+                               validators=[validators.Optional(), validators.Length(max=2000)],
+                               render_kw={"rows": "5", "placeholder": DEFAULT_CHANGE_SUMMARY_PROMPT},
+                               default='')

 class SingleTag(Form):

-    name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"})
-    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
-
-
-
-
+    name = StringField(_l('Tag name'), [validators.InputRequired()], render_kw={"placeholder": _l("Name")})
+    save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
@@ -17,6 +17,8 @@

 </script>

+<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
+<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>

@@ -25,6 +27,9 @@
    <div class="tabs collapsable">
        <ul>
            <li class="tab" id=""><a href="#general">{{ _('General') }}</a></li>
+            {% if llm_configured %}
+            <li class="tab"><a href="#ai-llm">{{ _('AI / LLM') }}</a></li>
+            {% endif %}
            <li class="tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
            {% if extra_tab_content %}
            <li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
@@ -43,11 +48,59 @@
                    <div class="pure-control-group">
                        {{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
                    </div>
+                    <div class="pure-control-group">
+                        {{ render_field(form.url_match_pattern, class="m-d") }}
+                        <span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
+                    </div>
+                    {% if matching_watches %}
+                    <div class="pure-control-group">
+                        <label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
+                        <ul class="tag-url-match-list">
+                            {% for w_uuid, w in matching_watches.items() %}
+                            <li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
+                            {% endfor %}
+                        </ul>
+                    </div>
+                    {% endif %}
+                    <div class="pure-control-group">
+                        <label>{{ _('Tag colour') }}</label>
+                        <div style="display:flex; align-items:center; gap:0.75em;">
+                            <input type="checkbox" id="use_custom_colour"
+                                   {% if data.get('tag_colour') %}checked{% endif %}>
+                            <label for="use_custom_colour" style="margin:0">{{ _('Custom colour') }}</label>
+                            <input type="color" id="tag_colour_picker"
+                                   value="{{ data.get('tag_colour') or '#4f8ef7' }}"
+                                   {% if not data.get('tag_colour') %}disabled{% endif %}>
+                            <input type="hidden" name="tag_colour" id="tag_colour_hidden"
+                                   value="{{ data.get('tag_colour', '') }}">
+                        </div>
+                        <span class="pure-form-message-inline">{{ _('Leave unchecked to use the auto-generated colour based on the tag name.') }}</span>
+                    </div>
+                    <script>
+                    (function () {
+                        var cb = document.getElementById('use_custom_colour');
+                        var picker = document.getElementById('tag_colour_picker');
+                        var hidden = document.getElementById('tag_colour_hidden');
+                        picker.addEventListener('input', function () { hidden.value = this.value; });
+                        cb.addEventListener('change', function () {
+                            picker.disabled = !this.checked;
+                            hidden.value = this.checked ? picker.value : '';
+                        });
+                    })();
+                    </script>
                </fieldset>
            </div>

+            {% if llm_configured %}
+            <div class="tab-pane-inner" id="ai-llm">
+                {% include "edit/include_llm_intent.html" %}
+            </div>
+            {% endif %}
+
            <div class="tab-pane-inner" id="filters-and-triggers">
-                <p>{{ _('These settings are') }} <strong><i>{{ _('added') }}</i></strong> {{ _('to any existing watch configurations.') }}</p>
+                {# TRANSLATORS: CJK fonts lack native italics; allow substitution with conventional local styling. dennis-ignore: W303 #}
+                <p>{{ _('These settings are <strong><i>added</i></strong> to any existing watch configurations.')|safe }}</p>
+
                {% include "edit/include_subtract.html" %}
                <div class="text-filtering border-fieldset">
                    <h3>{{ _('Text filtering') }}</h3>
@@ -78,7 +131,7 @@
                        {% if has_default_notification_urls %}
                        <div class="inline-warning">
                            <img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
-                            {{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} &dash; {{ _('an empty Notification URL list here will still send notifications.') }}
+                            {{ _('There are <a href="%(url)s">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only &dash; an empty Notification URL list here will still send notifications.', url=url_for('settings.settings_page') ~ '#notifications')|safe }}
                        </div>
                        {% endif %}
                        <a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
@@ -3,6 +3,26 @@
 {% from '_helpers.html' import render_simple_field, render_field %}
 <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
 <script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
+<style>
+{%- for uuid, tag in available_tags -%}
+{%- if tag and tag.title -%}
+{%- set class_name = tag.title|sanitize_tag_class -%}
+{%- if tag.get('tag_colour') -%}
+.watch-tag-list.tag-{{ class_name }} { background-color: {{ tag.tag_colour }}; color: {{ wcag_text_color(tag.tag_colour) }}; }
+{%- else -%}
+{%- set colors = generate_tag_colors(tag.title) -%}
+.watch-tag-list.tag-{{ class_name }} {
+  background-color: {{ colors['light']['bg'] }};
+  color: {{ colors['light']['color'] }};
+}
+html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
+  background-color: {{ colors['dark']['bg'] }};
+  color: {{ colors['dark']['color'] }};
+}
+{%- endif -%}
+{%- endif -%}
+{%- endfor -%}
+</style>

 <div class="box">
    <form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
@@ -45,10 +65,10 @@
            {% for uuid, tag in available_tags  %}
            <tr id="{{ uuid }}" class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}">
                <td class="watch-controls">
-                    <a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
+                    <a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('Mute notifications') }}" title="{{ _('Mute notifications') }}" class="icon icon-mute" ></a>
                </td>
                <td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
-                <td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
+                <td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
                <td>
                    <a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
                    <a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
@@ -307,8 +307,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
                # Provide feedback about skipped watches
                skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
                if skipped_count > 0:
-                    flash(gettext("Queued {} watches for rechecking ({} already queued or running).").format(
-                        len(watches_to_queue_filtered), skipped_count))
+                    flash(gettext("Queued {count} watches for rechecking ({skipped_count} already queued or running).").format(
+                        count=len(watches_to_queue_filtered), skipped_count=skipped_count))
                else:
                    if len(watches_to_queue_filtered) == 1:
                        flash(gettext("Queued 1 watch for rechecking."))
@@ -17,6 +17,34 @@ from changedetectionio.store import ChangeDetectionStore
 from changedetectionio.auth_decorator import login_optionally_required


+def _clean_litellm_error(exc) -> str:
+    """Return a short, human-readable error string from a litellm exception.
+
+    litellm embeds the raw provider JSON in str(exc), which can be hundreds of
+    characters of verbose quota detail.  We try to pull just the provider's
+    'message' field; failing that we return the first non-empty line with the
+    'litellm.XxxError:' class prefix stripped.
+    """
+    import json, re
+    raw = str(exc)
+    # Try to parse the embedded JSON block (starts at first '{')
+    brace = raw.find('{')
+    if brace >= 0:
+        try:
+            payload = json.loads(raw[brace:])
+            msg = (payload.get('error') or {}).get('message') or ''
+            if msg:
+                # Take only the first sentence / line — provider messages can be long
+                return msg.split('\n')[0].split('. ')[0].strip() + '.'
+        except Exception:
+            pass
+    # Fallback: strip the "litellm.XxxError: litellm.XxxError: providerException - " prefix
+    first_line = raw.split('\n')[0]
+    first_line = re.sub(r'^(litellm\.\w+:\s*)+', '', first_line)
+    first_line = re.sub(r'\w+Exception\s*-\s*', '', first_line).strip()
+    return first_line or raw.split('\n')[0]
+
+
 def construct_blueprint(datastore: ChangeDetectionStore):
    diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")

@@ -128,6 +156,175 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            redirect=redirect
        )

+    @diff_blueprint.route("/diff/<uuid_str:uuid>/llm-summary/prompt", methods=['GET'])
+    @login_optionally_required
+    def diff_llm_summary_prompt(uuid):
+        """Return the effective LLM summary prompt for a watch immediately (no LLM call)."""
+        from flask import jsonify
+        watch = datastore.data['watching'].get(uuid)
+        if not watch:
+            return jsonify({'prompt': ''}), 404
+        try:
+            from changedetectionio.llm.evaluator import get_effective_summary_prompt
+            prompt = get_effective_summary_prompt(watch, datastore)
+        except Exception:
+            prompt = ''
+        return jsonify({'prompt': prompt})
+
+    @diff_blueprint.route("/diff/<uuid_str:uuid>/llm-summary", methods=['GET'])
+    @login_optionally_required
+    def diff_llm_summary(uuid):
+        """
+        Generate (or return cached) an AI summary of the diff between two snapshots.
+        Called via AJAX from the diff page when no cached summary exists.
+        Returns JSON: {"summary": "...", "error": null} or {"summary": null, "error": "..."}
+        """
+        import difflib
+        from flask import jsonify
+
+        try:
+            watch = datastore.data['watching'][uuid]
+        except KeyError:
+            return jsonify({'summary': None, 'error': 'Watch not found'}), 404
+
+        llm_cfg = datastore.data.get('settings', {}).get('application', {}).get('llm', {})
+        if not llm_cfg.get('model'):
+            return jsonify({'summary': None, 'error': 'LLM not configured'}), 400
+
+        dates = list(watch.history.keys())
+        if len(dates) < 2:
+            return jsonify({'summary': None, 'error': 'Not enough history'}), 400
+
+        best_from = watch.get_from_version_based_on_last_viewed
+        from_version      = request.args.get('from_version', best_from if best_from else dates[-2])
+        to_version        = request.args.get('to_version', dates[-1])
+        from changedetectionio.llm.evaluator import DiffPrefs
+        prefs             = DiffPrefs.from_request_args(request.args)
+        all_changes       = prefs.all_changes
+        ignore_whitespace = prefs.ignore_whitespace
+        show_removed      = prefs.show_removed
+        show_added        = prefs.show_added
+
+        def _prep(text):
+            """Optionally normalise whitespace on each line before diffing."""
+            if not ignore_whitespace:
+                return text.splitlines()
+            return [' '.join(line.split()) for line in text.splitlines()]
+
+        def _make_unified_diff(a_text, b_text):
+            lines = list(difflib.unified_diff(_prep(a_text), _prep(b_text), lineterm='', n=3))
+            return '\n'.join(lines[2:]) if len(lines) > 2 else '\n'.join(lines)
+
+        def _apply_filters(diff_text):
+            """Strip +/- lines the user has hidden in the UI so the LLM matches what they see."""
+            if show_removed and show_added:
+                return diff_text
+            out = []
+            for line in diff_text.splitlines():
+                if line.startswith('-') and not show_removed:
+                    continue
+                if line.startswith('+') and not show_added:
+                    continue
+                out.append(line)
+            return '\n'.join(out)
+
+        try:
+            from_text = watch.get_history_snapshot(timestamp=from_version)
+            to_text = watch.get_history_snapshot(timestamp=to_version)
+        except Exception as e:
+            return jsonify({'summary': None, 'error': f'Could not read snapshots: {e}'}), 500
+
+        if all_changes:
+            # Build sequential diffs for every intermediate snapshot between from and to
+            # so the LLM sees the full timeline of changes, not just start→end
+            sorted_dates = sorted(dates)
+            try:
+                start_idx = sorted_dates.index(from_version)
+                end_idx   = sorted_dates.index(to_version)
+            except ValueError:
+                start_idx, end_idx = 0, len(sorted_dates) - 1
+
+            steps = sorted_dates[start_idx:end_idx + 1]
+            segments = []
+            for i in range(len(steps) - 1):
+                a_ts, b_ts = steps[i], steps[i + 1]
+                try:
+                    a_text = watch.get_history_snapshot(timestamp=a_ts) or ''
+                    b_text = watch.get_history_snapshot(timestamp=b_ts) or ''
+                except Exception:
+                    continue
+                seg = _apply_filters(_make_unified_diff(a_text, b_text))
+                if seg.strip():
+                    segments.append(f'=== {a_ts} → {b_ts} ===\n{seg}')
+
+            diff_text = '\n\n'.join(segments) if segments else ''
+        else:
+            diff_text = _apply_filters(_make_unified_diff(from_text, to_text))
+
+        if not diff_text.strip():
+            return jsonify({'summary': None, 'error': 'No differences found'})
+
+        from changedetectionio.llm.evaluator import (
+            summarise_change, get_effective_summary_prompt, build_summary_cache_prompt,
+            is_global_token_budget_exceeded, get_global_token_budget_month,
+            LLMInputTooLargeError,
+        )
+
+        # Diff-pref flags + system prompt + active model are part of the cache key
+        # so prompt or model changes bust the cache.
+        from changedetectionio.llm.evaluator import get_llm_settings
+        _ls = get_llm_settings(datastore)
+        _max_summary_tokens = _ls.max_summary_tokens
+        _llm_model = _ls.model
+        cache_prompt = build_summary_cache_prompt(
+            effective_prompt=get_effective_summary_prompt(watch, datastore),
+            max_summary_tokens=_max_summary_tokens,
+            prefs=prefs,
+            model=_llm_model,
+        )
+
+        # Check cache — keyed by version pair + prompt hash (invalidates if prompt changes)
+        cached = watch.get_llm_diff_summary(from_version, to_version, prompt=cache_prompt)
+        if cached:
+            import time
+            datastore.set_last_viewed(uuid, int(time.time()))
+            return jsonify({'summary': cached, 'error': None, 'cached': True})
+
+        # Check global monthly token budget before making an LLM call
+        if is_global_token_budget_exceeded(datastore):
+            budget = get_global_token_budget_month(datastore)
+            llm_cfg = datastore.data.get('settings', {}).get('application', {}).get('llm', {})
+            used = llm_cfg.get('tokens_this_month', 0)
+            return jsonify({
+                'summary': None,
+                'error': gettext(
+                    'Monthly AI token budget of %(budget)s tokens reached (%(used)s used). Resets next month.',
+                    budget=f'{budget:,}',
+                    used=f'{used:,}',
+                ),
+                'budget_exceeded': True,
+            }), 429
+
+        try:
+            summary = summarise_change(watch, datastore, diff=diff_text, current_snapshot=to_text)
+        except LLMInputTooLargeError as e:
+            return jsonify({'summary': None, 'error': str(e)}), 400
+        except Exception as e:
+            logger.error(f"LLM summary generation failed for {uuid}: {e}")
+            return jsonify({'summary': None, 'error': _clean_litellm_error(e)}), 500
+
+        if not summary:
+            return jsonify({'summary': None, 'error': 'LLM returned empty summary'})
+
+        try:
+            watch.save_llm_diff_summary(summary, from_version, to_version, prompt=cache_prompt)
+        except Exception as e:
+            logger.warning(f"Could not cache llm summary for {uuid}: {e}")
+
+        import time
+        datastore.set_last_viewed(uuid, int(time.time()))
+        return jsonify({'summary': summary, 'error': None, 'cached': False})
+
    @diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['GET'])
    @login_optionally_required
    def diff_history_page_extract_GET(uuid):
@@ -238,6 +435,47 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            redirect=redirect
        )

+    @diff_blueprint.route("/diff/<uuid_str:uuid>/download-patch", methods=['GET'])
+    @login_optionally_required
+    def download_patch(uuid):
+        """
+        Generate and return a unified diff patch file between two snapshots.
+        Query params: from_version, to_version (timestamp strings from watch history).
+        Returns the patch as a downloadable .patch file — the same content fed to the LLM.
+        """
+        import difflib
+
+        try:
+            watch = datastore.data['watching'][uuid]
+        except KeyError:
+            return make_response('Watch not found', 404)
+
+        dates = list(watch.history.keys())
+        if len(dates) < 2:
+            return make_response('Not enough history', 400)
+
+        from_version = request.args.get('from_version', dates[-2])
+        to_version   = request.args.get('to_version',   dates[-1])
+
+        try:
+            from_text = watch.get_history_snapshot(timestamp=from_version)
+            to_text   = watch.get_history_snapshot(timestamp=to_version)
+        except Exception as e:
+            return make_response(f'Could not read snapshots: {e}', 500)
+
+        diff_lines = list(difflib.unified_diff(
+            from_text.splitlines(keepends=True),
+            to_text.splitlines(keepends=True),
+            fromfile=f'snapshot-{from_version}',
+            tofile=f'snapshot-{to_version}',
+            lineterm='',
+        ))
+        patch_text = ''.join(diff_lines) if diff_lines else '(no differences)\n'
+
+        response = make_response(patch_text)
+        response.headers['Content-Type'] = 'text/plain; charset=utf-8'
+        return response
+
    @diff_blueprint.route("/diff/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
    @login_optionally_required
    def processor_asset(uuid, asset_name):
@@ -10,10 +10,32 @@ from changedetectionio.store import ChangeDetectionStore
 from changedetectionio.auth_decorator import login_optionally_required
 from changedetectionio.time_handler import is_within_schedule
 from changedetectionio import worker_pool
+from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config

 def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
    edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
-    
+
+    def _resolve_llm_group_overrides(watch, datastore) -> dict:
+        """
+        For each LLM field (llm_intent, llm_change_summary): if the watch has no own
+        value but a linked tag does, return {'value': ..., 'group_name': ...} so the
+        edit template can render the textarea as readonly with a group-sourced placeholder.
+        Returns None for each field when the watch has its own value (editable).
+        """
+        result = {'llm_intent': None, 'llm_change_summary': None}
+        for field in ('llm_intent', 'llm_change_summary'):
+            if (watch.get(field) or '').strip():
+                continue  # watch has its own value — editable, no group override
+            for tag_uuid in watch.get('tags', []):
+                tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
+                if tag and (tag.get(field) or '').strip():
+                    result[field] = {
+                        'value': tag.get(field).strip(),
+                        'group_name': tag.get('title', 'tag'),
+                    }
+                    break
+        return result
+
    def _watch_has_tag_options_set(watch):
        """This should be fixed better so that Tag is some proper Model, a tag is just a Watch also"""
        for tag_uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
@@ -67,10 +89,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
                default['proxy'] = ''
        # proxy_override set to the json/text list of the items

-        # browser_profile: None means "use system default" — map to 'system' so the radio pre-selects correctly
-        if not default.get('browser_profile'):
-            default['browser_profile'] = 'system'
-
        # Does it use some custom form? does one exist?
        processor_name = datastore.data['watching'][uuid].get('processor', '')
        processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
@@ -143,37 +161,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            except Exception as e:
                logger.warning(f"Failed to load processor config: {e}")

-        from changedetectionio.model.browser_profile import BrowserProfile
-        from changedetectionio import content_fetchers as cf
-        store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
+        for p in datastore.extra_browsers:
+            form.fetch_backend.choices.append(p)

-        # Resolve the name of the system-level default profile for the label
-        from changedetectionio.model.browser_profile import get_profile
-        _system_default_machine_name = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
-        _all_store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
-        _default_profile = get_profile(_system_default_machine_name, _all_store_profiles)
-        if _default_profile:
-            _system_label = gettext('System settings default') + ' \u2013 ' + _default_profile.name
-        else:
-            _system_label = gettext('System settings default')
-
-        # Choices: system default + always-present defaults (requests) + user-created profiles
-        form.browser_profile.choices = [('system', _system_label)] + [
-            (p.get_machine_name(), p.name)
-            for p in cf.DEFAULT_BROWSER_PROFILES.values()
-        ] + [
-            (machine_name, raw.get('name', machine_name) if isinstance(raw, dict) else getattr(raw, 'name', machine_name))
-            for machine_name, raw in store_profiles.items()
-        ]
-
-        # Build a map of machine_name → fetcher class name for the JS visibility system
-        all_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
-        for machine_name, raw in store_profiles.items():
-            try:
-                all_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
-            except Exception:
-                pass
-        browser_profile_fetchers = {mn: p.get_fetcher_class_name() for mn, p in all_profiles.items()}
+        form.fetch_backend.choices.append(("system", gettext('System settings default')))

        # form.browser_steps[0] can be assumed that we 'goto url' first

@@ -181,7 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
            del form.proxy
        else:
-            form.proxy.choices = [('', 'Default')]
+            form.proxy.choices = [('', gettext('Default'))]
            for p in datastore.proxy_list:
                form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))

@@ -241,7 +232,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe

            # Recast it if need be to right data Watch handler
            watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
-            datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore, default=datastore.data['watching'][uuid])
+            datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid])

            # Save the watch immediately
            datastore.data['watching'][uuid].commit()
@@ -327,13 +318,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            template_args = {
                'available_processors': processors.available_processors(),
                'available_timezones': sorted(available_timezones()),
-                'browser_profile_fetchers': browser_profile_fetchers,
                'browser_steps_config': browser_step_ui_config,
                'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
                'extra_classes': ' '.join(c),
                'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
                'extra_processor_config': form.extra_tab_content(),
-                'extra_title': f" - Edit - {watch.label}",
+                'extra_title': f" - {gettext('Edit')} - {watch.label}",
                'form': form,
                'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
                'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
@@ -352,7 +342,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
                'using_global_webdriver_wait': not default['webdriver_delay'],
                'uuid': uuid,
                'watch': watch,
-                'capabilities': capabilities
+                'capabilities': capabilities,
+                'auto_applied_tags': {
+                    tag_uuid: tag
+                    for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
+                    if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
+                },
+                # LLM intent context
+                'llm_configured': bool(_get_llm_config(datastore)),
+                'llm_group_overrides': _resolve_llm_group_overrides(watch, datastore),
            }

            included_content = None
@@ -61,7 +61,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        timestamp = None

        extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
-        fetcher_supports_screenshots = watch.fetcher_supports_screenshots
+        is_html_webdriver = watch.fetcher_supports_screenshots

        triggered_line_numbers = []
        ignored_line_numbers = []
@@ -107,12 +107,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                                 current_diff_url=watch['url'],
                                 current_version=timestamp,
                                 extra_stylesheets=extra_stylesheets,
-                                 extra_title=f" - Diff - {watch.label} @ {timestamp}",
+                                 extra_title=f" - {gettext('Diff')} - {watch.label} @ {timestamp}",
                                 highlight_ignored_line_numbers=ignored_line_numbers,
                                 highlight_triggered_line_numbers=triggered_line_numbers,
                                 highlight_blocked_line_numbers=blocked_line_numbers,
                                 history_n=watch.history_n,
-                                 fetcher_supports_screenshots=fetcher_supports_screenshots,
+                                 is_html_webdriver=is_html_webdriver,
                                 last_error=watch['last_error'],
                                 last_error_screenshot=watch.get_error_snapshot(),
                                 last_error_text=watch.get_error_text(),
@@ -25,7 +25,8 @@
        <fieldset class="diff-fieldset">
            {% if versions|length >= 1 %}
                <span style="white-space: nowrap;">
-                <label id="change-from" for="diff-from-version" class="from-to-label">{{ _('From') }}</label>
+                {# TRANSLATORS: 'From' labels the older snapshot version selector on the diff page #}
+                <label id="change-from" for="diff-from-version" class="from-to-label">{{ pgettext('diff version', 'From') }}</label>
                <select id="diff-from-version" name="from_version" class="needs-localtime">
                    {%- for version in versions|reverse -%}
                        <option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
@@ -35,7 +36,8 @@
                </select>
                </span>
                <span style="white-space: nowrap;">
-                <label id="change-to" for="diff-to-version" class="from-to-label">{{ _('To') }}</label>
+                {# TRANSLATORS: 'To' labels the newer snapshot version selector on the diff page #}
+                <label id="change-to" for="diff-to-version" class="from-to-label">{{ pgettext('diff version', 'To') }}</label>
                <select id="diff-to-version" name="to_version" class="needs-localtime">
                    {%- for version in versions|reverse -%}
                        <option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
@@ -76,6 +78,12 @@
                <label for="replaced" class="pure-checkbox" id="label-diff-replaced">
                <input type="checkbox" id="replaced"  name="replaced" {% if diff_prefs.replaced %}checked=""{% endif %}> {{ _('Replaced') }}</label>
            </span>
+            {%- if llm_configured -%}
+            <span>
+                <label for="llm_all_changes" class="pure-checkbox" id="label-diff-llm-all-changes" title="{{ _('Include all intermediate snapshots between the selected versions in the AI summary') }}">
+                <input type="checkbox" id="llm_all_changes" name="llm_all_changes" {% if diff_prefs.llm_all_changes %}checked=""{% endif %}> &#x2728; {{ _('AI: every change between versions') }}</label>
+            </span>
+            {%- endif -%}
        </fieldset>
        {%- if versions|length >= 2 -%}
            <div id="keyboard-nav">
@@ -124,9 +132,22 @@
        </div>
    {%- endif -%}
        {%- if password_enabled_and_share_is_off -%}
-            <div class="tip">{{ _('Pro-tip: You can enable') }} <strong>{{ _('"share access when password is enabled"') }}</strong> {{ _('from settings.') }}
+            <div class="tip">{{ _('Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings.')|safe }}
            </div>
        {%- endif -%}
+        {%- if llm_configured -%}
+        <div id="llm-diff-summary-area"{% if not llm_diff_summary %} data-pending="1"{% endif %}>
+            <span class="llm-diff-summary-label">&#x2728; {{ _('AI Change Summary') }}</span>
+            {%- if llm_diff_summary -%}
+            <p class="llm-diff-summary-text">{{ llm_diff_summary }}</p>
+            {%- else -%}
+            <p class="llm-diff-summary-text llm-diff-summary-loading">{{ _('Generating summary…') }}</p>
+            {%- if llm_summary_prompt -%}
+            <p class="llm-diff-summary-prompt"><span class="llm-diff-summary-prompt-text">{{ llm_summary_prompt }}</span></p>
+            {%- endif -%}
+            {%- endif -%}
+        </div>
+        {%- endif -%}
        <div id="text-diff-heading-area"  style="user-select: none;">
            <div class="snapshot-age"><span>{{ from_version|format_timestamp_timeago }}</span>
                {%- if note -%}<span class="note"><strong>{{ note }}</strong></span>{%- endif -%}
@@ -136,6 +157,7 @@
        <pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
    <div id="diff-visualiser-area-after" style="user-select: none;">
        <strong>{{ _('Tip:') }}</strong> {{ _('Highlight text to share or add to ignore lists.') }}
+        &nbsp;&mdash;&nbsp;<a href="{{ url_for('ui.ui_diff.download_patch', uuid=uuid, from_version=from_version, to_version=to_version) }}" target="_blank" rel="noopener" style="font-size:0.85em;">{{ _('Download difference patch') }}</a>
    </div>
    </div>

@@ -143,7 +165,7 @@
         <div class="tip">
             {{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
         </div>
-         {% if fetcher_supports_screenshots %}
+         {% if is_html_webdriver %}
           {% if screenshot %}
            <div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
            <img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}" >
@@ -162,5 +184,58 @@
 </script>
 <script src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>

+{% if llm_configured %}
+<script>
+$(function () {
+    var $area = $('#llm-diff-summary-area');
+    if (!$area.length || !$area.data('pending')) return;
+
+    var fromVersion = $('#diff-from-version').val();
+    var toVersion   = $('#diff-to-version').val();
+    var summaryUrl  = "{{ url_for('ui.ui_diff.diff_llm_summary', uuid=uuid) }}";
+
+    function showLlmError(msg) {
+        $area.find('.llm-diff-summary-text')
+             .removeClass('llm-diff-summary-loading')
+             .addClass('llm-error')
+             .text(msg);
+        $area.removeAttr('data-pending');
+    }
+
+    var llmAllChanges      = $('#llm_all_changes').is(':checked')    ? 1 : 0;
+    var ignoreWhitespace   = $('#ignoreWhitespace').is(':checked')   ? 1 : 0;
+    var showRemoved        = $('#removed').is(':checked')            ? 1 : 0;
+    var showAdded          = $('#added').is(':checked')              ? 1 : 0;
+    $.getJSON(summaryUrl, {
+        from_version:      fromVersion,
+        to_version:        toVersion,
+        all_changes:       llmAllChanges,
+        ignore_whitespace: ignoreWhitespace,
+        removed:           showRemoved,
+        added:             showAdded,
+    })
+        .done(function (data) {
+            if (data.summary) {
+                $area.find('.llm-diff-summary-text')
+                     .removeClass('llm-diff-summary-loading')
+                     .text(data.summary);
+                $area.removeAttr('data-pending');
+            } else if (data.error) {
+                showLlmError(data.error);
+            } else {
+                $area.remove();
+            }
+        })
+        .fail(function (xhr) {
+            var resp = xhr.responseJSON;
+            if (resp && resp.error) {
+                showLlmError(resp.error);
+            } else {
+                showLlmError('AI summary request failed (HTTP ' + xhr.status + ').');
+            }
+        });
+});
+</script>
+{% endif %}

 {% endblock %}
@@ -1,6 +1,6 @@
 {% extends 'base.html' %}
 {% block content %}
-{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
+{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
 {% from '_common_fields.html' import render_common_settings_form %}
 <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
@@ -27,8 +27,7 @@
    const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}";
    const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
    const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
-    const default_system_fetch_backend = {{ (browser_profile_fetchers.get(settings_application.get('browser_profile') or 'direct_http_requests', 'requests')) | tojson }};
-    const browserProfileFetcherMap = {{ browser_profile_fetchers | tojson }};
+    const default_system_fetch_backend="{{ settings_application['fetch_backend'] }}";
 </script>
 <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
@@ -58,6 +57,9 @@
            {% if capabilities.supports_visual_selector %}
            <li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
            {% endif %}
+            {% if not llm_features_disabled %}
+            <li class="tab"><a href="#ai-llm">{{ _('AI / LLM') }}</a></li>
+            {% endif %}
            {% if capabilities.supports_text_filters_and_triggers %}
            <li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
            <li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
@@ -82,6 +84,14 @@
                    <div class="pure-control-group">
                        {{ render_field(form.tags) }}
                        <span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
+                        {% if auto_applied_tags %}
+                        <span class="pure-form-message-inline">
+                            {{ _('Also automatically applied by URL pattern:') }}
+                            {% for tag_uuid, tag in auto_applied_tags.items() %}
+                            <a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
+                            {% endfor %}
+                        </span>
+                        {% endif %}
                    </div>
                    <div class="pure-control-group inline-radio">
                        {{ render_field(form.processor) }}
@@ -132,19 +142,11 @@
            {% if capabilities.supports_request_type %}
            <div class="tab-pane-inner" id="request">
                    <div class="pure-control-group inline-radio">
-                        <div><label for="browser_profile">{{ form.browser_profile.label.text }}</label></div>
-                        <div><ul class="fetch-backend" id="browser_profile">
-                        {%- for subfield in form.browser_profile %}
-                            <li>
-                                {{ subfield() }}
-                                {{ browser_profile_fetchers.get(subfield.data, '')|fetcher_status_icons }}
-                                <label for="{{ subfield.id }}">{{ subfield.label.text }}</label>
-                            </li>
-                        {%- endfor %}
-                        </ul></div>
+                        {{ render_field(form.fetch_backend, class="fetch-backend") }}
                        <span class="pure-form-message-inline">
-                            <p>{{ _('Choose how this watch fetches its target URL. \'System settings default\' inherits the global setting.') }}</p>
-                            <p>{{ _('Manage browser profiles in') }} <a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Settings → Browsers') }}</a>.</p>
+                            <p>{{ _('Use the <strong>Basic</strong> method (default) where your watched sites don\'t need Javascript to render.')|safe }}</p>
+                            <p>{{ _('The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.')|safe }}</p>
+                            {{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
                        </span>
                    </div>
                {% if form.proxy %}
@@ -158,13 +160,13 @@
                {% endif %}

                <!-- webdriver always -->
-                <fieldset data-visible-for="fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser"  style="display: none;">
+                <fieldset data-visible-for="fetch_backend=html_webdriver"  style="display: none;">
                    <div class="pure-control-group">
                        {{ render_field(form.webdriver_delay) }}
                        <div class="pure-form-message-inline">
                            <strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
                            <br>
-                            {{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
+                            {{ _('This will wait <i>n</i> seconds before extracting the text.')|safe }}
                            {% if using_global_webdriver_wait %}
                            <br><strong>{{ _('Using the current global default settings') }}</strong>
                            {% endif %}
@@ -181,8 +183,8 @@
                        </div>
                    </div>
                </fieldset>
-                <!-- requests always -->
-                <fieldset data-visible-for="fetch_backend=requests">
+                <!-- html requests always -->
+                <fieldset data-visible-for="fetch_backend=html_requests">
                    <div class="pure-control-group">
                        <a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
                    </div>
@@ -219,7 +221,7 @@ Math: {{ 1 + 1 }}") }}
                            ({{ _('Not supported by Selenium browser') }})
                        </div>
                    </div>
-            <fieldset data-visible-for="fetch_backend=requests fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser" >
+            <fieldset data-visible-for="fetch_backend=html_requests fetch_backend=html_webdriver" >
                    <div class="pure-control-group inline-radio advanced-options"  style="display: none;">
                    {{ render_checkbox_field(form.ignore_status_codes) }}
                    </div>
@@ -297,7 +299,7 @@ Math: {{ 1 + 1 }}") }}
                        {% if has_default_notification_urls %}
                        <div class="inline-warning">
                            <img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
-                            {{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} &dash; {{ _('an empty Notification URL list here will still send notifications.') }}
+                            {{ _('There are <a href="%(url)s">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only &dash; an empty Notification URL list here will still send notifications.', url=url_for('settings.settings_page') ~ '#notifications')|safe }}
                        </div>
                        {% endif %}
                        <a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
@@ -321,7 +323,13 @@ Math: {{ 1 + 1 }}") }}
                    </div>
                </div>
            </div>
+            {% if not llm_features_disabled %}
+            <div class="tab-pane-inner" id="ai-llm">
+                {% include "edit/include_llm_intent.html" %}
+            </div>
+            {% endif %}
            <div class="tab-pane-inner" id="filters-and-triggers">
+
                <span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
              <div>
              <div id="edit-text-filter">
@@ -347,7 +355,7 @@ Math: {{ 1 + 1 }}") }}
                        {{ render_checkbox_field(form.filter_text_added) }}
                        {{ render_checkbox_field(form.filter_text_replaced) }}
                        {{ render_checkbox_field(form.filter_text_removed) }}
-                    <span class="pure-form-message-inline">{{ _('Note: Depending on the length and similarity of the text on each line, the algorithm may consider an') }} <strong>{{ _('addition') }}</strong> {{ _('instead of') }} <strong>{{ _('replacement') }}</strong> {{ _('for example.') }}</span><br>
+                    <span class="pure-form-message-inline">{{ _('Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.')|safe }}</span><br>
                    <span class="pure-form-message-inline">&nbsp;{{ _('So it\'s always better to select') }} <strong>{{ _('Added') }}</strong>+<strong>{{ _('Replaced') }}</strong> {{ _('when you\'re interested in new content.') }}</span><br>
                    <span class="pure-form-message-inline">&nbsp;{{ _('When content is merely moved in a list, it will also trigger an') }} <strong>{{ _('addition') }}</strong>, {{ _('consider enabling') }} <code><strong>{{ _('Only trigger when unique lines appear') }}</strong></code></span>
                </fieldset>
@@ -361,7 +369,8 @@ Math: {{ 1 + 1 }}") }}
                </fieldset>
                <fieldset class="pure-control-group">
                    {{ render_checkbox_field(form.sort_text_alphabetically) }}
-                    <span class="pure-form-message-inline">{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with') }} <i>{{ _('check unique lines') }}</i> {{ _('below.') }}</span>
+                    {# TRANSLATORS: CJK fonts lack native italics; allow substitution with conventional local styling. dennis-ignore: W303 #}
+                    <span class="pure-form-message-inline">{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.')|safe }}</span>
                </fieldset>
                <fieldset class="pure-control-group">
                    {{ render_checkbox_field(form.trim_text_whitespace) }}
@@ -375,7 +384,20 @@ Math: {{ 1 + 1 }}") }}
                    const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
                </script>
                <br>
-                {#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
+                {% if llm_configured %}
+                <div id="llm-preview-result" style="display:none; margin-bottom: 0.8em; padding: 0.8em 1.1em; border-radius: 4px; border-left: 4px solid #ccc; font-size: 0.9em;">
+                    <div style="font-size:0.75em; text-transform:uppercase; letter-spacing:0.06em; opacity:0.55; margin-bottom:0.35em;">{{ _('AI Intent preview') }}</div>
+                    <span class="llm-preview-verdict" style="font-weight: bold;"></span>
+                    <div class="llm-preview-answer" style="margin-top: 0.5em; white-space: pre-wrap; line-height: 1.5; font-style: italic;"></div>
+                </div>
+                <style>
+                    #llm-preview-result { transition: border-color 0.2s, background 0.2s; }
+                    #llm-preview-result[data-found="1"] { border-color: #2ecc71; background: rgba(46,204,113,0.07); }
+                    #llm-preview-result[data-found="1"] .llm-preview-verdict { color: #27ae60; }
+                    #llm-preview-result[data-found="0"] { border-color: #aaa; background: rgba(0,0,0,0.03); }
+                    #llm-preview-result[data-found="0"] .llm-preview-verdict { color: #888; }
+                </style>
+                {% endif %}
                <div class="minitabs-wrapper">
                  <div class="minitabs-content">
                      <div id="text-preview-inner" class="monospace-preview">
@@ -485,6 +507,16 @@ Math: {{ 1 + 1 }}") }}
                            <td>{{ _('Server type reply') }}</td>
                            <td>{{ watch.get('remote_server_reply') }}</td>
                        </tr>
+                        {% if not llm_features_disabled and settings_application.get('llm', {}).get('model') %}
+                        <tr>
+                            <td>{{ _('AI tokens (last check)') }}</td>
+                            <td>{{ "{:,}".format(watch.get('llm_last_tokens_used') or 0) }}</td>
+                        </tr>
+                        <tr>
+                            <td>{{ _('AI tokens (total)') }}</td>
+                            <td>{{ "{:,}".format(watch.get('llm_tokens_used_cumulative') or 0) }}</td>
+                        </tr>
+                        {% endif %}
                        </tbody>
                    </table>

@@ -26,7 +26,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
        add_paused = request.form.get('edit_and_watch_submit_button') != None
        from changedetectionio import processors
        processor = request.form.get('processor', processors.get_default_processor())
-        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras={'paused': add_paused, 'processor': processor})
+        llm_intent = request.form.get('llm_intent', '').strip()
+        extras = {'paused': add_paused, 'processor': processor}
+        if llm_intent:
+            extras['llm_intent'] = llm_intent
+        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras=extras)

        if new_uuid:
            if add_paused:
@@ -82,6 +82,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
        sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])

        proxy_list = datastore.proxy_list
+
+        from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config
+        from changedetectionio.llm.ui_strings import LLM_INTENT_WATCH_PLACEHOLDER
+        llm_configured = bool(_get_llm_config(datastore))
+
        output = render_template(
            "watch-overview.html",
            active_tag=active_tag,
@@ -89,9 +94,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
            datastore=datastore,
            errored_count=errored_count,
-            extra_classes='has-queue' if not update_q.empty() else '',
+            extra_classes=' '.join(filter(None, ['has-queue' if not update_q.empty() else '', 'llm-configured' if llm_configured else ''])),
            form=form,
            generate_tag_colors=processors.generate_processor_badge_colors,
+            wcag_text_color=processors.wcag_text_color,
            guid=datastore.data['app_guid'],
            has_proxies=proxy_list,
            hosted_sticky=os.getenv("SALTED_PASS", False) == False,
@@ -105,10 +111,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            search_q=request.args.get('q', '').strip(),
            sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
            sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
-            system_default_fetcher=datastore.data['settings']['application'].get('browser_profile'),
+            system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
            tags=sorted_tags,
            unread_changes_count=datastore.unread_changes_count,
-            watches=sorted_watches
+            watches=sorted_watches,
+            llm_configured=llm_configured,
+            llm_intent_watch_placeholder=LLM_INTENT_WATCH_PLACEHOLDER,
        )

        # Return freed template-building memory to the OS immediately.
@@ -71,6 +71,13 @@ document.addEventListener('DOMContentLoaded', function() {
 {%- for uuid, tag in tags -%}
 {%- if tag and tag.title -%}
 {%- set class_name = tag.title|sanitize_tag_class -%}
+{%- if tag.get('tag_colour') -%}
+.button-tag.tag-{{ class_name }},
+.watch-tag-list.tag-{{ class_name }} {
+  background-color: {{ tag.tag_colour }};
+  color: {{ wcag_text_color(tag.tag_colour) }};
+}
+{%- else -%}
 {%- set colors = generate_tag_colors(tag.title) -%}
 .button-tag.tag-{{ class_name }} {
  background-color: {{ colors['light']['bg'] }};
@@ -92,6 +99,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
  color: {{ colors['dark']['color'] }};
 }
 {%- endif -%}
+{%- endif -%}
 {%- endfor -%}
 </style>
 <div class="box" id="form-quick-watch-add">
@@ -105,6 +113,16 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
                    {{ render_nolabel_field(form.watch_submit_button, title=_("Watch this URL!") ) }}
                    {{ render_nolabel_field(form.edit_and_watch_submit_button, title=_("Edit first then Watch") ) }}
            </div>
+            {% if llm_configured %}
+            <div id="quick-watch-llm-intent" style="display:none; margin-top: 0.5em;">
+                <textarea name="llm_intent"
+                          id="quick_watch_llm_intent"
+                          rows="2"
+                          class="pure-input-1"
+                          placeholder="{{ _('AI — Notify when…') }} {{ llm_intent_watch_placeholder }}"
+                ></textarea>
+            </div>
+            {% endif %}
            <div id="watch-group-tag">
               {{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder=_("Watch group / tag"), class="transparent-field") }}
            </div>
@@ -118,6 +136,14 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
        </span>
    </form>
 </div>
+{% if llm_configured %}
+<script>
+window.watchOverviewI18n = {
+    generatingSummary: {{ _('Generating summary…')|tojson }},
+    gotoHistory:       {{ _('Goto full history')|tojson }}
+};
+</script>
+{% endif %}
 <div class="box">
    <form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
@@ -137,12 +163,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
                       data-confirm-type="danger"
                       data-confirm-title="{{ _('Clear Histories') }}"
                       data-confirm-message="{{ _('<p>Are you sure you want to clear history for the selected items?</p><p>This action cannot be undone.</p>') }}"
+                       {# TRANSLATORS: Universally recognized; typically left as-is. dennis-ignore: W302 #}
                       data-confirm-button="{{ _('OK') }}"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Clear/reset history') }}</button>
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"
                       data-requires-confirm
                       data-confirm-type="danger"
                       data-confirm-title="{{ _('Delete Watches?') }}"
-                       data-confirm-message="{{ _('<p>Are you sure you want to delete the selected watches?</strong></p><p>This action cannot be undone.</p>') }}"
+                       data-confirm-message="{{ _('<p><strong>Are you sure you want to delete the selected watches?</strong></p><p>This action cannot be undone.</p>') }}"
                       data-confirm-button="{{ _('Delete') }}"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Delete') }}</button>
    </div>

@@ -197,8 +224,8 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
             {%- if any_has_restock_price_processor -%}
                <th>{{ _('Restock & Price') }}</th>
             {%- endif -%}
-                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last') }}</span> {{ _('Checked') }} <span class='arrow {{link_order}}'></span></a></th>
-                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last') }}</span> {{ _('Changed') }} <span class='arrow {{link_order}}'></span></a></th>
+                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last Checked') }}</span><span class="hide-on-desktop">{{ _('Checked') }}</span> <span class='arrow {{link_order}}'></span></a></th>
+                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last Changed') }}</span><span class="hide-on-desktop">{{ _('Changed') }}</span> <span class='arrow {{link_order}}'></span></a></th>
                <th class="empty-cell"></th>
            </tr>
            </thead>
@@ -237,10 +264,10 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
                <td class="inline checkbox-uuid" ><div><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
                <td class="inline watch-controls">
                    <div>
-                    <a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
-                    <a class="ajax-op state-on pause-toggle"  data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
-                    <a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
-                    <a class="ajax-op state-on mute-toggle" data-op="mute"  style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
+                    <a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="{{ _('Pause checks') }}" title="{{ _('Pause checks') }}" class="icon icon-pause" ></a>
+                    <a class="ajax-op state-on pause-toggle"  data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="{{ _('UnPause checks') }}" title="{{ _('UnPause checks') }}" class="icon icon-unpause" ></a>
+                    <a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('Mute notification') }}" title="{{ _('Mute notification') }}" class="icon icon-mute" ></a>
+                    <a class="ajax-op state-on mute-toggle" data-op="mute"  style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('UnMute notification') }}" title="{{ _('UnMute notification') }}" class="icon icon-mute" ></a>
                    </div>
                </td>

@@ -284,8 +311,11 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
                            {%- endfor -%}
                        </div>
                    <div class="status-icons">
-                            <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
-                            {{ watch.effective_browser_profile.get_fetcher_class_name()|fetcher_status_icons }}
+                            <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="{{ _('Create a link to share watch config with others') }}" ></a>
+                            {%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
+                            {%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
+                                {{ effective_fetcher|fetcher_status_icons }}
+                            {%- endif -%}
                            {%- if watch.is_pdf  -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
                            {%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}

@@ -324,13 +354,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
                </td>
 {%- endif -%}
            {#last_checked becomes fetch-start-time#}
-                <td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
+                <td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" data-label="{{ _('Last Checked') }}">
                    <div class="spinner-wrapper" style="display:none;" >
-                        <span class="spinner"></span><span class="status-text">&nbsp;{{ _('Checking now') }}</span>
+                        <span class="spinner"></span><span class="status-text">&nbsp;{{ watch['__check_status'] or _('Checking now') }}</span>
                    </div>
                    <span class="innertext">{{watch|format_last_checked_time|safe}}</span>
                </td>
-                <td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
+                <td class="last-changed" data-timestamp="{{ watch.last_changed }}" data-label="{{ _('Last Changed') }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
                    {{watch.last_changed|format_timestamp_timeago}}
                    {%- else -%}
                    {{ _('Not yet') }}
@@ -342,7 +372,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
                    <a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">{{ _('Queued') }}</a>
                    <a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">{{ _('Recheck') }}</a>
                    <a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">{{ _('Edit') }}</a>
-                    <a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">{{ _('History') }}</a>
+                    <a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link ai-history-btn" style="display: none;" data-uuid="{{ watch.uuid }}" data-summary-url="{{ url_for('ui.ui_diff.diff_llm_summary', uuid=watch.uuid) }}"><span class="btn-label-history">{{ _('History') }}</span><span class="btn-label-summary">&#x2728; {{ _('Summary') }}</span></a>
                    <a href="{{ url_for('ui.ui_preview.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">{{ _('Preview') }}</a>
                    </div>
                </td>
@@ -1,4 +1,5 @@
 from json_logic.builtins import BUILTINS
+from flask_babel import lazy_gettext as _l

 from .exceptions import EmptyConditionRuleRowNotUsable
 from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager
@@ -6,19 +7,19 @@ from . import default_plugin
 from loguru import logger
 # List of all supported JSON Logic operators
 operator_choices = [
-    (None, "Choose one - Operator"),
-    (">", "Greater Than"),
-    ("<", "Less Than"),
-    (">=", "Greater Than or Equal To"),
-    ("<=", "Less Than or Equal To"),
-    ("==", "Equals"),
-    ("!=", "Not Equals"),
-    ("in", "Contains"),
+    (None, _l("Choose one - Operator")),
+    (">", _l("Greater Than")),
+    ("<", _l("Less Than")),
+    (">=", _l("Greater Than or Equal To")),
+    ("<=", _l("Less Than or Equal To")),
+    ("==", _l("Equals")),
+    ("!=", _l("Not Equals")),
+    ("in", _l("Contains")),
 ]

 # Fields available in the rules
 field_choices = [
-    (None, "Choose one - Field"),
+    (None, _l("Choose one - Field")),
 ]

 # The data we will feed the JSON Rules to see if it passes the test/conditions or not
@@ -3,6 +3,7 @@ import re
 import pluggy
 from price_parser import Price
 from loguru import logger
+from flask_babel import lazy_gettext as _l

 hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")

@@ -47,22 +48,22 @@ def register_operators():
@hookimpl
 def register_operator_choices():
    return [
-        ("!in", "Does NOT Contain"),
-        ("starts_with", "Text Starts With"),
-        ("ends_with", "Text Ends With"),
-        ("length_min", "Length minimum"),
-        ("length_max", "Length maximum"),
-        ("contains_regex", "Text Matches Regex"),
-        ("!contains_regex", "Text Does NOT Match Regex"),
+        ("!in", _l("Does NOT Contain")),
+        ("starts_with", _l("Text Starts With")),
+        ("ends_with", _l("Text Ends With")),
+        ("length_min", _l("Length minimum")),
+        ("length_max", _l("Length maximum")),
+        ("contains_regex", _l("Text Matches Regex")),
+        ("!contains_regex", _l("Text Does NOT Match Regex")),
    ]

@hookimpl
 def register_field_choices():
    return [
-        ("extracted_number", "Extracted number after 'Filters & Triggers'"),
+        ("extracted_number", _l("Extracted number after 'Filters & Triggers'")),
 #        ("meta_description", "Meta Description"),
 #        ("meta_keywords", "Meta Keywords"),
-        ("page_filtered_text", "Page text after 'Filters & Triggers'"),
+        ("page_filtered_text", _l("Page text after 'Filters & Triggers'")),
        #("page_title", "Page <title>"), # actual page title <title>
    ]

@@ -1,6 +1,7 @@
 # Condition Rule Form (for each rule row)
 from wtforms import Form, SelectField, StringField, validators
 from wtforms import validators
+from flask_babel import lazy_gettext as _l

 class ConditionFormRow(Form):

@@ -8,18 +9,18 @@ class ConditionFormRow(Form):
    from changedetectionio.conditions import plugin_manager
    from changedetectionio.conditions import operator_choices, field_choices
    field = SelectField(
-        "Field",
+        _l("Field"),
        choices=field_choices,
        validators=[validators.Optional()]
    )

    operator = SelectField(
-        "Operator",
+        _l("Operator"),
        choices=operator_choices,
        validators=[validators.Optional()]
    )

-    value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"})
+    value = StringField(_l("Value"), validators=[validators.Optional()], render_kw={"placeholder": _l("A value")})

    def validate(self, extra_validators=None):
        # First, run the default validators
@@ -30,15 +31,15 @@ class ConditionFormRow(Form):
        # If any of the operator/field/value is set, then they must be all set
        if any(value not in ("", False, "None", None) for value in [self.operator.data, self.field.data, self.value.data]):
            if not self.operator.data or self.operator.data == 'None':
-                self.operator.errors.append("Operator is required.")
+                self.operator.errors.append(_l("Operator is required."))
                return False

            if not self.field.data or self.field.data == 'None':
-                self.field.errors.append("Field is required.")
+                self.field.errors.append(_l("Field is required."))
                return False

            if not self.value.data:
-                self.value.errors.append("Value is required.")
+                self.value.errors.append(_l("Value is required."))
                return False

        return True  # Only return True if all conditions pass
@@ -4,6 +4,7 @@ Provides metrics for measuring text similarity between snapshots.
 """
 import pluggy
 from loguru import logger
+from flask_babel import gettext as _, lazy_gettext as _l

 LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000

@@ -53,8 +54,8 @@ def register_operator_choices():
@conditions_hookimpl
 def register_field_choices():
    return [
-        ("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
-        ("levenshtein_distance", "Levenshtein - Text change distance"),
+        ("levenshtein_ratio", _l("Levenshtein - Text similarity ratio")),
+        ("levenshtein_distance", _l("Levenshtein - Text change distance")),
    ]

@conditions_hookimpl
@@ -77,7 +78,7 @@ def ui_edit_stats_extras(watch):
    """Add Levenshtein stats to the UI using the global plugin system"""
    """Generate the HTML for Levenshtein stats - shared by both plugin systems"""
    if len(watch.history.keys()) < 2:
-        return "<p>Not enough history to calculate Levenshtein metrics</p>"
+        return f"<p>{_('Not enough history to calculate Levenshtein metrics')}</p>"


    # Protection against the algorithm getting stuck on huge documents
@@ -87,37 +88,37 @@ def ui_edit_stats_extras(watch):
            for idx in (-1, -2)
            if len(k) >= abs(idx)
    ):
-        return "<p>Snapshot too large for edit statistics, skipping.</p>"
+        return f"<p>{_('Snapshot too large for edit statistics, skipping.')}</p>"

    try:
        lev_data = levenshtein_ratio_recent_history(watch)
        if not lev_data or not isinstance(lev_data, dict):
-            return "<p>Unable to calculate Levenshtein metrics</p>"
-            
+            return f"<p>{_('Unable to calculate Levenshtein metrics')}</p>"
+
        html = f"""
        <div class="levenshtein-stats">
-            <h4>Levenshtein Text Similarity Details</h4>
+            <h4>{_('Levenshtein Text Similarity Details')}</h4>
            <table class="pure-table">
                <tbody>
                    <tr>
-                        <td>Raw distance (edits needed)</td>
+                        <td>{_('Raw distance (edits needed)')}</td>
                        <td>{lev_data['distance']}</td>
                    </tr>
                    <tr>
-                        <td>Similarity ratio</td>
+                        <td>{_('Similarity ratio')}</td>
                        <td>{lev_data['ratio']:.4f}</td>
                    </tr>
                    <tr>
-                        <td>Percent similar</td>
+                        <td>{_('Percent similar')}</td>
                        <td>{lev_data['percent_similar']}%</td>
                    </tr>
                </tbody>
            </table>
-            <p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
+            <p style="font-size: 80%;">{_('Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.')}</p>
        </div>
        """
        return html
    except Exception as e:
        logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
-        return "<p>Error calculating Levenshtein metrics</p>"
+        return f"<p>{_('Error calculating Levenshtein metrics')}</p>"
        
@@ -4,6 +4,7 @@ Provides word count metrics for snapshot content.
 """
 import pluggy
 from loguru import logger
+from flask_babel import gettext as _, lazy_gettext as _l

 # Support both plugin systems
 conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
@@ -40,7 +41,7 @@ def register_operator_choices():
 def register_field_choices():
    # Add a field that will be available in conditions
    return [
-        ("word_count", "Word count of content"),
+        ("word_count", _l("Word count of content")),
    ]

@conditions_hookimpl
@@ -61,16 +62,16 @@ def _generate_stats_html(watch):
    
    html = f"""
    <div class="word-count-stats">
-        <h4>Content Analysis</h4>
+        <h4>{_('Content Analysis')}</h4>
        <table class="pure-table">
            <tbody>
                <tr>
-                    <td>Word count (latest snapshot)</td>
+                    <td>{_('Word count (latest snapshot)')}</td>
                    <td>{word_count}</td>
                </tr>
            </tbody>
        </table>
-        <p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
+        <p style="font-size: 80%;">{_('Word count is a simple measure of content length, calculated by splitting text on whitespace.')}</p>
    </div>
    """
    return html
@@ -1,4 +1,5 @@
 import sys
+from changedetectionio.strtobool import strtobool
 from loguru import logger
 from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
 import os
@@ -24,71 +25,87 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
 # Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
 SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))

+# available_fetchers() will scan this implementation looking for anything starting with html_
+# this information is used in the form selections
+from changedetectionio.content_fetchers.requests import fetcher as html_requests
+
+
 import importlib.resources
 XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
 INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
 FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')


-# Registry: clean fetcher name → fetcher class (e.g. 'requests', 'playwright', 'cloakbrowser')
-FETCHERS: dict = {}
-
-
-def register_fetcher(name: str, cls) -> None:
-    """Register a fetcher class under its clean name (no html_ prefix)."""
-    FETCHERS[name] = cls
-
-
-def get_fetcher(name: str):
-    """Return the fetcher class for a clean name, or None."""
-    return FETCHERS.get(name)
-
-
 def available_fetchers():
-    """Return list of (name, description) for all registered fetchers."""
-    return [(name, cls.fetcher_description) for name, cls in FETCHERS.items()
-            if hasattr(cls, 'fetcher_description')]
+    # See the if statement at the bottom of this file for how we switch between playwright and webdriver
+    import inspect
+    p = []
+
+    # Get built-in fetchers (but skip plugin fetchers that were added via setattr)
+    for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
+        if inspect.isclass(obj):
+            # @todo html_ is maybe better as fetcher_ or something
+            # In this case, make sure to edit the default one in store.py and fetch_site_status.py
+            if name.startswith('html_'):
+                # Skip plugin fetchers that were already registered
+                if name not in _plugin_fetchers:
+                    t = tuple([name, obj.fetcher_description])
+                    p.append(t)
+
+    # Get plugin fetchers from cache (already loaded at module init)
+    for name, fetcher_class in _plugin_fetchers.items():
+        if hasattr(fetcher_class, 'fetcher_description'):
+            t = tuple([name, fetcher_class.fetcher_description])
+            p.append(t)
+        else:
+            logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
+
+    return p


-def available_browser_fetchers():
-    """Return list of (name, description) for fetchers that support screenshots (browser-type fetchers)."""
-    return [(name, cls.fetcher_description) for name, cls in FETCHERS.items()
-            if cls.supports_screenshots]
+def get_plugin_fetchers():
+    """Load and return all plugin fetchers from the centralized plugin manager."""
+    from changedetectionio.pluggy_interface import plugin_manager

-
-def _load_fetchers():
-    """Load all fetchers (built-ins + plugins) into the FETCHERS registry."""
-    from changedetectionio.pluggy_interface import plugin_manager, register_builtin_fetchers
-
-    # Built-ins must be registered first
-    register_builtin_fetchers()
-
-    # Then external plugins
+    fetchers = {}
    try:
+        # Call the register_content_fetcher hook from all registered plugins
        results = plugin_manager.hook.register_content_fetcher()
        for result in results:
            if result:
                name, fetcher_class = result
-                register_fetcher(name, fetcher_class)
-                logger.info(f"Registered fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', '?')}")
+                fetchers[name] = fetcher_class
+                # Register in current module so hasattr() checks work
+                setattr(sys.modules[__name__], name, fetcher_class)
+                logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
    except Exception as e:
        logger.error(f"Error loading plugin fetchers: {e}")

+    return fetchers


-# Default browser profiles always shown in the browser profiles table (keyed by machine name)
-DEFAULT_BROWSER_PROFILES: dict = {}
+# Initialize plugins at module load time
+_plugin_fetchers = get_plugin_fetchers()


-def _register_default_browser_profiles():
-    """Register browser profiles that are always present in the profiles table."""
-    from changedetectionio.model.browser_profile import BUILTIN_REQUESTS
-    DEFAULT_BROWSER_PROFILES[BUILTIN_REQUESTS.get_machine_name()] = BUILTIN_REQUESTS
+# Decide which is the 'real' HTML webdriver, this is more a system wide config
+# rather than site-specific.
+use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
+if use_playwright_as_chrome_fetcher:
+    # @note - For now, browser steps always uses playwright
+    if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
+        logger.debug('Using Playwright library as fetcher')
+        from .playwright import fetcher as html_webdriver
+    else:
+        logger.debug('Using direct Python Puppeteer library as fetcher')
+        from .puppeteer import fetcher as html_webdriver
+
+else:
+    logger.debug("Falling back to selenium as fetcher")
+    from .webdriver_selenium import fetcher as html_webdriver


-# Populate the registry at module load time
-_load_fetchers()
-
-
-_register_default_browser_profiles()
+# Register built-in fetchers as plugins after all imports are complete
+from changedetectionio.pluggy_interface import register_builtin_fetchers
+register_builtin_fetchers()

@@ -70,41 +70,37 @@ class Fetcher():
    supports_screenshots = False        # Can capture page screenshots
    supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector

-    # Icon shown in the watch list when this fetcher is the effective fetcher.
-    # Set to a dict with 'filename', 'alt', 'title' keys (image served from static/images/).
-    # None means no icon is shown (e.g. plain HTTP requests fetcher).
-    status_icon = None
-
    # Screenshot element locking - prevents layout shifts during screenshot capture
    # Only needed for visual comparison (image_ssim_diff processor)
    # Locks element dimensions in the first viewport to prevent headers/ads from resizing
    lock_viewport_elements = False      # Default: disabled for performance

-    # BrowserProfile-derived settings — applied by browser fetchers, ignored by html_requests
-    viewport_width: int = 1280
-    viewport_height: int = 1000
-    block_images: bool = False
-    block_fonts: bool = False
-    profile_user_agent: str = None   # Profile-level UA; lower priority than request_headers User-Agent
-    ignore_https_errors: bool = False
-    locale: str = None
-    service_workers: str = 'allow'
-    extra_delay: int = 0
-
    def __init__(self, **kwargs):
        if kwargs and 'screenshot_format' in kwargs:
            self.screenshot_format = kwargs.get('screenshot_format')

+        # Allow lock_viewport_elements to be set via kwargs
        if kwargs and 'lock_viewport_elements' in kwargs:
            self.lock_viewport_elements = kwargs.get('lock_viewport_elements')

-        # BrowserProfile fields — store whatever was passed, subclasses use them
-        for field in ('viewport_width', 'viewport_height', 'block_images', 'block_fonts',
-                      'profile_user_agent', 'ignore_https_errors', 'locale',
-                      'service_workers', 'extra_delay'):
-            if field in kwargs:
-                setattr(self, field, kwargs[field])

+    @classmethod
+    def get_status_icon_data(cls):
+        """Return data for status icon to display in the watch overview.
+
+        This method can be overridden by subclasses to provide custom status icons.
+
+        Returns:
+            dict or None: Dictionary with icon data:
+                {
+                    'filename': 'icon-name.svg',  # Icon filename
+                    'alt': 'Alt text',            # Alt attribute
+                    'title': 'Tooltip text',      # Title attribute
+                    'style': 'height: 1em;'       # Optional inline CSS
+                }
+                Or None if no icon
+        """
+        return None

    def clear_content(self):
        """
@@ -202,16 +198,6 @@ class Fetcher():
                    # Stop processing here
                    raise BrowserStepsStepException(step_n=step_n, original_e=e)

-    def disk_cleanup_after_fetch(self):
-        """Remove any temporary files written to disk during a fetch.
-
-        The default implementation is a no-op.  Browser-based fetchers
-        override this to delete browser-step screenshots and any other
-        ephemeral files they create.  Called by the processor after
-        ``quit()`` regardless of whether the fetch succeeded or failed.
-        """
-        pass
-
    # It's always good to reset these
    def delete_browser_steps_screenshots(self):
        import glob
@@ -0,0 +1,474 @@
+import asyncio
+import gc
+import json
+import os
+from urllib.parse import urlparse
+
+from loguru import logger
+
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
+    SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
+from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
+from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable, \
+    BrowserStepsStepException
+
+
+async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
+    import os
+    import time
+
+    start = time.time()
+    watch_info = f"[{watch_uuid}] " if watch_uuid else ""
+
+    setup_start = time.time()
+    page_height = await page.evaluate("document.documentElement.scrollHeight")
+    page_width = await page.evaluate("document.documentElement.scrollWidth")
+    original_viewport = page.viewport_size
+    dimensions_time = time.time() - setup_start
+
+    logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
+
+    # Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
+    step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
+    screenshot_chunks = []
+    y = 0
+    elements_locked = False
+
+    # Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
+    # This prevents headers/ads from resizing when viewport changes
+    if lock_viewport_elements and page_height > page.viewport_size['height']:
+        lock_start = time.time()
+        lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
+        with open(lock_elements_js_path, 'r') as f:
+            lock_elements_js = f.read()
+        await page.evaluate(lock_elements_js)
+        elements_locked = True
+        lock_time = time.time() - lock_start
+        logger.debug(f"{watch_info}Viewport element locking enabled (took {lock_time:.2f}s)")
+
+    if page_height > page.viewport_size['height']:
+        if page_height < step_size:
+            step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
+        # Never set viewport taller than our max capture height - otherwise one screenshot chunk
+        # captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
+        step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
+        viewport_start = time.time()
+        logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
+        # Set viewport to a larger size to capture more content at once
+        await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
+        viewport_time = time.time() - viewport_start
+        logger.debug(f"{watch_info}Viewport changed to {page.viewport_size['width']}x{step_size} (took {viewport_time:.2f}s)")
+
+    # Capture screenshots in chunks up to the max total height
+    capture_start = time.time()
+    chunk_times = []
+    # Use PNG for better quality (no compression artifacts), JPEG for smaller size
+    screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
+    # PNG should use quality 100, JPEG uses configurable quality
+    screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
+
+    while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
+        # Only scroll if not at the top (y > 0)
+        if y > 0:
+            await page.evaluate(f"window.scrollTo(0, {y})")
+
+        # Request GC only before screenshot (not 3x per chunk)
+        await page.request_gc()
+
+        screenshot_kwargs = {
+            'type': screenshot_type,
+            'full_page': False
+        }
+        # Only pass quality parameter for jpeg (PNG doesn't support it in Playwright)
+        if screenshot_type == 'jpeg':
+            screenshot_kwargs['quality'] = screenshot_quality
+
+        chunk_start = time.time()
+        screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
+        chunk_time = time.time() - chunk_start
+        chunk_times.append(chunk_time)
+        logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
+        y += step_size
+
+    # Restore original viewport size
+    await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
+
+    # Unlock element dimensions if they were locked
+    if elements_locked:
+        unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
+        with open(unlock_elements_js_path, 'r') as f:
+            unlock_elements_js = f.read()
+        await page.evaluate(unlock_elements_js)
+        logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
+
+    capture_time = time.time() - capture_start
+    total_capture_time = sum(chunk_times)
+    logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
+
+    # If we have multiple chunks, stitch them together
+    if len(screenshot_chunks) > 1:
+        stitch_start = time.time()
+        logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
+
+        # Always use spawn subprocess for ANY stitching (2+ chunks)
+        # PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
+        # Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
+        from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
+        import multiprocessing
+        import struct
+
+        ctx = multiprocessing.get_context('spawn')
+        parent_conn, child_conn = ctx.Pipe()
+        p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
+        p.start()
+
+        # Send via raw bytes (no pickle)
+        parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
+        for chunk in screenshot_chunks:
+            parent_conn.send_bytes(chunk)
+
+        screenshot = parent_conn.recv_bytes()
+        p.join()
+
+        parent_conn.close()
+        child_conn.close()
+        del p, parent_conn, child_conn
+
+        stitch_time = time.time() - stitch_start
+        total_time = time.time() - start
+        setup_time = total_time - capture_time - stitch_time
+        logger.debug(
+            f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
+            f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
+        return screenshot
+
+    total_time = time.time() - start
+    setup_time = total_time - capture_time
+    logger.debug(
+        f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
+        f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
+
+    return screenshot_chunks[0]
+
+class fetcher(Fetcher):
+    fetcher_description = "Playwright {}/Javascript".format(
+        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
+    )
+    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
+        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
+
+    browser_type = ''
+    command_executor = ''
+
+    # Configs for Proxy setup
+    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
+    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
+
+    proxy = None
+
+    # Capability flags
+    supports_browser_steps = True
+    supports_screenshots = True
+    supports_xpath_element_data = True
+
+    @classmethod
+    def get_status_icon_data(cls):
+        """Return Chrome browser icon data for Playwright fetcher."""
+        return {
+            'filename': 'google-chrome-icon.png',
+            'alt': 'Using a Chrome browser',
+            'title': 'Using a Chrome browser'
+        }
+
+    def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
+        super().__init__(**kwargs)
+
+        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
+
+        if custom_browser_connection_url:
+            self.browser_connection_is_custom = True
+            self.browser_connection_url = custom_browser_connection_url
+        else:
+            # Fallback to fetching from system
+            # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
+            self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')
+
+        # If any proxy settings are enabled, then we should setup the proxy object
+        proxy_args = {}
+        for k in self.playwright_proxy_settings_mappings:
+            v = os.getenv('playwright_proxy_' + k, False)
+            if v:
+                proxy_args[k] = v.strip('"')
+
+        if proxy_args:
+            self.proxy = proxy_args
+
+        # allow per-watch proxy selection override
+        if proxy_override:
+            self.proxy = {'server': proxy_override}
+
+        if self.proxy:
+            # Playwright needs separate username and password values
+            parsed = urlparse(self.proxy.get('server'))
+            if parsed.username:
+                self.proxy['username'] = parsed.username
+                self.proxy['password'] = parsed.password
+
+    async def screenshot_step(self, step_n=''):
+        super().screenshot_step(step_n=step_n)
+        watch_uuid = getattr(self, 'watch_uuid', None)
+        screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
+
+        # Request GC immediately after screenshot to free memory
+        # Screenshots can be large and browser steps take many of them
+        await self.page.request_gc()
+
+        if self.browser_steps_screenshot_path is not None:
+            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
+            logger.debug(f"Saving step screenshot to {destination}")
+            with open(destination, 'wb') as f:
+                f.write(screenshot)
+            # Clear local reference to allow screenshot bytes to be collected
+            del screenshot
+            gc.collect()
+
+    async def save_step_html(self, step_n):
+        super().save_step_html(step_n=step_n)
+        content = await self.page.content()
+
+        # Request GC after getting page content
+        await self.page.request_gc()
+
+        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
+        logger.debug(f"Saving step HTML to {destination}")
+        with open(destination, 'w', encoding='utf-8') as f:
+            f.write(content)
+        # Clear local reference
+        del content
+        gc.collect()
+
+    async def run(self,
+                  fetch_favicon=True,
+                  current_include_filters=None,
+                  empty_pages_are_a_change=False,
+                  ignore_status_codes=False,
+                  is_binary=False,
+                  request_body=None,
+                  request_headers=None,
+                  request_method=None,
+                  screenshot_format=None,
+                  timeout=None,
+                  url=None,
+                  watch_uuid=None,
+                  ):
+
+        from playwright.async_api import async_playwright
+        import playwright._impl._errors
+        import time
+        self.delete_browser_steps_screenshots()
+        self.watch_uuid = watch_uuid  # Store for use in screenshot_step
+        response = None
+
+        async with async_playwright() as p:
+            browser_type = getattr(p, self.browser_type)
+
+            # Seemed to cause a connection Exception even tho I can see it connect
+            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
+            # 60,000 connection timeout only
+            browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
+
+            # SOCKS5 with authentication is not supported (yet)
+            # https://github.com/microsoft/playwright/issues/10567
+
+            # Set user agent to prevent Cloudflare from blocking the browser
+            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
+            context = await browser.new_context(
+                accept_downloads=False,  # Should never be needed
+                bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others
+                extra_http_headers=request_headers,
+                ignore_https_errors=True,
+                proxy=self.proxy,
+                service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'), # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
+                user_agent=manage_user_agent(headers=request_headers),
+            )
+
+            self.page = await context.new_page()
+
+            # Listen for all console events and handle errors
+            self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
+
+            # Re-use as much code from browser steps as possible so its the same
+            from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
+            browsersteps_interface = steppable_browser_interface(start_url=url)
+            browsersteps_interface.page = self.page
+
+            response = await browsersteps_interface.action_goto_url(value=url)
+
+            if response is None:
+                await context.close()
+                await browser.close()
+                logger.debug("Content Fetcher > Response object from the browser communication was none")
+                raise EmptyReply(url=url, status_code=None)
+
+            # In async_playwright, all_headers() returns a coroutine
+            try:
+                self.headers = await response.all_headers()
+            except TypeError:
+                # Fallback for sync version
+                self.headers = response.all_headers()
+
+            try:
+                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
+                    await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
+            except playwright._impl._errors.TimeoutError as e:
+                await context.close()
+                await browser.close()
+                # This can be ok, we will try to grab what we could retrieve
+                pass
+            except Exception as e:
+                logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}")
+                await context.close()
+                await browser.close()
+                raise PageUnloadable(url=url, status_code=None, message=str(e))
+
+            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+            await self.page.wait_for_timeout(extra_wait * 1000)
+
+            try:
+                self.status_code = response.status
+            except Exception as e:
+                # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
+                logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
+                logger.critical(response)
+                await context.close()
+                await browser.close()
+                raise PageUnloadable(url=url, status_code=None, message=str(e))
+
+            if fetch_favicon:
+                try:
+                    self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
+                    await self.page.request_gc()
+                except Exception as e:
+                    logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
+
+            if self.status_code != 200 and not ignore_status_codes:
+                screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
+                # Finally block will handle cleanup
+                raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
+
+            if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
+                logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
+                await context.close()
+                await browser.close()
+                raise EmptyReply(url=url, status_code=response.status)
+
+            # Wrap remaining operations in try/finally to ensure cleanup
+            try:
+                # Run Browser Steps here
+                if self.browser_steps:
+                    try:
+                        await self.iterate_browser_steps(start_url=url)
+                    except BrowserStepsStepException:
+                        # Finally block will handle cleanup
+                        raise
+
+                    await self.page.wait_for_timeout(extra_wait * 1000)
+
+                now = time.time()
+                # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
+                if current_include_filters is not None:
+                    await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
+                else:
+                    await self.page.evaluate("var include_filters=''")
+                await self.page.request_gc()
+
+                # request_gc before and after evaluate to free up memory
+                # @todo browsersteps etc
+                MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+                self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
+                    "visualselector_xpath_selectors": visualselector_xpath_selectors,
+                    "max_height": MAX_TOTAL_HEIGHT
+                })
+                await self.page.request_gc()
+
+                self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
+                await self.page.request_gc()
+
+                self.content = await self.page.content()
+                await self.page.request_gc()
+                logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
+
+
+                # Bug 3 in Playwright screenshot handling
+                # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
+                # JPEG is better here because the screenshots can be very very large
+
+                # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
+                # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
+                # acceptable screenshot quality here
+                # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
+                self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
+
+                # Force aggressive memory cleanup - screenshots are large and base64 decode creates temporary buffers
+                await self.page.request_gc()
+                gc.collect()
+
+            except ScreenshotUnavailable:
+                # Re-raise screenshot unavailable exceptions
+                raise ScreenshotUnavailable(url=url, status_code=self.status_code)
+
+            finally:
+                # Clean up resources properly with timeouts to prevent hanging
+                try:
+                    if hasattr(self, 'page') and self.page:
+                        await self.page.request_gc()
+                        await asyncio.wait_for(self.page.close(), timeout=5.0)
+                        logger.debug(f"Successfully closed page for {url}")
+                except asyncio.TimeoutError:
+                    logger.warning(f"Timed out closing page for {url} (5s)")
+                except Exception as e:
+                    logger.warning(f"Error closing page for {url}: {e}")
+                finally:
+                    self.page = None
+
+                try:
+                    if context:
+                        await asyncio.wait_for(context.close(), timeout=5.0)
+                        logger.debug(f"Successfully closed context for {url}")
+                except asyncio.TimeoutError:
+                    logger.warning(f"Timed out closing context for {url} (5s)")
+                except Exception as e:
+                    logger.warning(f"Error closing context for {url}: {e}")
+                finally:
+                    context = None
+
+                try:
+                    if browser:
+                        await asyncio.wait_for(browser.close(), timeout=5.0)
+                        logger.debug(f"Successfully closed browser connection for {url}")
+                except asyncio.TimeoutError:
+                    logger.warning(f"Timed out closing browser connection for {url} (5s)")
+                except Exception as e:
+                    logger.warning(f"Error closing browser for {url}: {e}")
+                finally:
+                    browser = None
+
+                # Force Python GC to release Playwright resources immediately
+                # Playwright objects can have circular references that delay cleanup
+                gc.collect()
+
+
+# Plugin registration for built-in fetcher
+class PlaywrightFetcherPlugin:
+    """Plugin class that registers the Playwright fetcher as a built-in plugin."""
+
+    def register_content_fetcher(self):
+        """Register the Playwright fetcher"""
+        return ('html_webdriver', fetcher)
+
+
+# Create module-level instance for plugin registration
+playwright_plugin = PlaywrightFetcherPlugin()
+
+
+
@@ -1,41 +0,0 @@
-"""
-Playwright CDP fetcher — connects to a remote browser via Chrome DevTools Protocol.
-
-browser_connection_url must be supplied via the resolved BrowserProfile
-(set by preconfigure_browser_profiles_based_on_env at startup or edited in the UI).
-"""
-from loguru import logger
-from changedetectionio.pluggy_interface import hookimpl
-from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
-
-
-class fetcher(PlaywrightBaseFetcher):
-    fetcher_description = "Playwright Chrome (CDP/Remote)"
-    requires_connection_url = True
-
-    def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
-        super().__init__(proxy_override=proxy_override, custom_browser_connection_url=custom_browser_connection_url, **kwargs)
-
-        if custom_browser_connection_url:
-            self.browser_connection_is_custom = True
-            self.browser_connection_url = custom_browser_connection_url
-        else:
-            logger.critical("Playwright CDP fetcher has no browser_connection_url — browser profile was not configured. "
-                            "Set PLAYWRIGHT_DRIVER_URL or configure a browser profile in Settings.")
-            self.browser_connection_url = None
-
-        # CDP always connects to Chromium
-        self.browser_type = 'chromium'
-
-    async def _connect_browser(self, p):
-        browser_type = getattr(p, self.browser_type)
-        return await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60_000)
-
-
-class PlaywrightCDPPlugin:
-    @hookimpl
-    def register_content_fetcher(self):
-        return ('playwright_cdp', fetcher)
-
-
-cdp_plugin = PlaywrightCDPPlugin()
@@ -1,403 +0,0 @@
-"""
-Playwright-based content fetchers.
-
-Submodules:
-  cdp     — connect to a remote browser via Chrome DevTools Protocol (CDP/WebSocket)
-  chrome  — launch a local Chromium browser
-  firefox — launch a local Firefox browser
-  webkit  — launch a local WebKit (Safari-engine) browser
-"""
-
-import asyncio
-import gc
-import json
-import os
-import re
-from urllib.parse import urlparse
-
-from loguru import logger
-
-from changedetectionio.content_fetchers import (
-    SCREENSHOT_MAX_HEIGHT_DEFAULT,
-    SCREENSHOT_MAX_TOTAL_HEIGHT,
-    SCREENSHOT_SIZE_STITCH_THRESHOLD,
-    FAVICON_FETCHER_JS,
-    INSTOCK_DATA_JS,
-    XPATH_ELEMENT_JS,
-    visualselector_xpath_selectors,
-)
-from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
-from changedetectionio.content_fetchers.exceptions import (
-    BrowserStepsStepException,
-    EmptyReply,
-    Non200ErrorCodeReceived,
-    PageUnloadable,
-    ScreenshotUnavailable,
-)
-
-
-async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
-    import time
-
-    start = time.time()
-    watch_info = f"[{watch_uuid}] " if watch_uuid else ""
-
-    setup_start = time.time()
-    page_height = await page.evaluate("document.documentElement.scrollHeight")
-    page_width = await page.evaluate("document.documentElement.scrollWidth")
-    original_viewport = page.viewport_size
-    dimensions_time = time.time() - setup_start
-
-    logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
-
-    step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD
-    screenshot_chunks = []
-    y = 0
-    elements_locked = False
-
-    if lock_viewport_elements and page_height > page.viewport_size['height']:
-        lock_start = time.time()
-        lock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'lock-elements-sizing.js')
-        with open(lock_elements_js_path, 'r') as f:
-            lock_elements_js = f.read()
-        await page.evaluate(lock_elements_js)
-        elements_locked = True
-        logger.debug(f"{watch_info}Viewport element locking enabled (took {time.time() - lock_start:.2f}s)")
-
-    if page_height > page.viewport_size['height']:
-        if page_height < step_size:
-            step_size = page_height
-        await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
-
-    capture_start = time.time()
-    chunk_times = []
-    screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
-    screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
-
-    while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
-        if y > 0:
-            await page.evaluate(f"window.scrollTo(0, {y})")
-
-        await _safe_request_gc(page)
-
-        screenshot_kwargs = {'type': screenshot_type, 'full_page': False}
-        if screenshot_type == 'jpeg':
-            screenshot_kwargs['quality'] = screenshot_quality
-
-        chunk_start = time.time()
-        screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
-        chunk_time = time.time() - chunk_start
-        chunk_times.append(chunk_time)
-        logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
-        y += step_size
-
-    await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
-
-    if elements_locked:
-        unlock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'unlock-elements-sizing.js')
-        with open(unlock_elements_js_path, 'r') as f:
-            unlock_elements_js = f.read()
-        await page.evaluate(unlock_elements_js)
-
-    capture_time = time.time() - capture_start
-
-    if len(screenshot_chunks) > 1:
-        stitch_start = time.time()
-        from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
-        import multiprocessing
-        import struct
-
-        ctx = multiprocessing.get_context('spawn')
-        parent_conn, child_conn = ctx.Pipe()
-        p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
-        p.start()
-
-        parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
-        for chunk in screenshot_chunks:
-            parent_conn.send_bytes(chunk)
-
-        screenshot = parent_conn.recv_bytes()
-        p.join()
-        parent_conn.close()
-        child_conn.close()
-        del p, parent_conn, child_conn
-
-        stitch_time = time.time() - stitch_start
-        total_time = time.time() - start
-        setup_time = total_time - capture_time - stitch_time
-        logger.debug(
-            f"{watch_info}Screenshot complete - Page height: {page_height}px | "
-            f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
-        return screenshot
-
-    total_time = time.time() - start
-    logger.debug(
-        f"{watch_info}Screenshot complete - Page height: {page_height}px | "
-        f"Setup: {total_time - capture_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
-    return screenshot_chunks[0]
-
-
-async def _safe_request_gc(page):
-    """Request browser GC — Chromium-specific, silently ignored on Firefox/WebKit."""
-    try:
-        await page.request_gc()
-    except Exception:
-        pass
-
-
-class PlaywrightBaseFetcher(Fetcher):
-    """
-    Shared base for all Playwright fetchers.
-
-    Subclasses implement ``_connect_browser(playwright_instance)`` to return a
-    connected-or-launched browser object.  Everything else — context creation,
-    page interaction, screenshot capture, browser-steps execution — lives here.
-    """
-
-    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
-
-    proxy = None
-
-    # Capability flags
-    supports_browser_steps = True
-    supports_screenshots = True
-    supports_xpath_element_data = True
-
-    status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
-
-    def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
-        super().__init__(**kwargs)
-
-        # Subclasses may use this (e.g. CDP); others ignore it
-        self._custom_browser_connection_url = custom_browser_connection_url
-
-        proxy_args = {}
-        for k in self.playwright_proxy_settings_mappings:
-            v = os.getenv('playwright_proxy_' + k, False)
-            if v:
-                proxy_args[k] = v.strip('"')
-
-        if proxy_args:
-            self.proxy = proxy_args
-
-        if proxy_override:
-            self.proxy = {'server': proxy_override}
-
-        if self.proxy:
-            parsed = urlparse(self.proxy.get('server', ''))
-            if parsed.username:
-                self.proxy['username'] = parsed.username
-                self.proxy['password'] = parsed.password
-
-    def disk_cleanup_after_fetch(self):
-        """Delete browser-step screenshots written during this fetch."""
-        self.delete_browser_steps_screenshots()
-
-    async def _connect_browser(self, playwright_instance):
-        """Return an open browser object.  Must be overridden by each subclass."""
-        raise NotImplementedError(f"{type(self).__name__} must implement _connect_browser()")
-
-    async def screenshot_step(self, step_n=''):
-        super().screenshot_step(step_n=step_n)
-        watch_uuid = getattr(self, 'watch_uuid', None)
-        screenshot = await capture_full_page_async(
-            page=self.page,
-            screenshot_format=self.screenshot_format,
-            watch_uuid=watch_uuid,
-            lock_viewport_elements=self.lock_viewport_elements,
-        )
-        await _safe_request_gc(self.page)
-
-        if self.browser_steps_screenshot_path is not None:
-            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
-            logger.debug(f"Saving step screenshot to {destination}")
-            with open(destination, 'wb') as f:
-                f.write(screenshot)
-            del screenshot
-            gc.collect()
-
-    async def save_step_html(self, step_n):
-        super().save_step_html(step_n=step_n)
-        content = await self.page.content()
-        await _safe_request_gc(self.page)
-
-        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
-        logger.debug(f"Saving step HTML to {destination}")
-        with open(destination, 'w', encoding='utf-8') as f:
-            f.write(content)
-        del content
-        gc.collect()
-
-    async def run(self,
-                  fetch_favicon=True,
-                  current_include_filters=None,
-                  empty_pages_are_a_change=False,
-                  ignore_status_codes=False,
-                  is_binary=False,
-                  request_body=None,
-                  request_headers=None,
-                  request_method=None,
-                  screenshot_format=None,
-                  timeout=None,
-                  url=None,
-                  watch_uuid=None,
-                  ):
-        from playwright.async_api import async_playwright
-        import playwright._impl._errors
-        import time
-
-        self.delete_browser_steps_screenshots()
-        self.watch_uuid = watch_uuid
-        response = None
-
-        async with async_playwright() as p:
-            browser = await self._connect_browser(p)
-
-            ua = manage_user_agent(headers=request_headers) or self.profile_user_agent or None
-
-            context_kwargs = dict(
-                accept_downloads=False,
-                bypass_csp=True,
-                extra_http_headers=request_headers,
-                ignore_https_errors=self.ignore_https_errors,
-                proxy=self.proxy,
-                service_workers=self.service_workers,
-                user_agent=ua,
-                viewport={'width': self.viewport_width, 'height': self.viewport_height},
-            )
-            if self.locale:
-                context_kwargs['locale'] = self.locale
-
-            context = await browser.new_context(**context_kwargs)
-
-            if self.block_images:
-                await context.route(
-                    re.compile(r'\.(png|jpe?g|gif|svg|ico|webp|avif|bmp)(\?.*)?$', re.IGNORECASE),
-                    lambda route: route.abort()
-                )
-            if self.block_fonts:
-                await context.route(
-                    re.compile(r'\.(woff2?|ttf|otf|eot)(\?.*)?$', re.IGNORECASE),
-                    lambda route: route.abort()
-                )
-
-            self.page = await context.new_page()
-            self.page.on("console", lambda msg: logger.debug(f"Playwright console: {url} {msg.type}: {msg.text}"))
-
-            from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
-            browsersteps_interface = steppable_browser_interface(start_url=url)
-            browsersteps_interface.page = self.page
-
-            response = await browsersteps_interface.action_goto_url(value=url)
-
-            if response is None:
-                await context.close()
-                await browser.close()
-                raise EmptyReply(url=url, status_code=None)
-
-            try:
-                self.headers = await response.all_headers()
-            except TypeError:
-                self.headers = response.all_headers()
-
-            try:
-                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
-                    await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
-            except playwright._impl._errors.TimeoutError:
-                await context.close()
-                await browser.close()
-                pass
-            except Exception as e:
-                await context.close()
-                await browser.close()
-                raise PageUnloadable(url=url, status_code=None, message=str(e))
-
-            extra_wait = self.extra_delay + self.render_extract_delay
-            await self.page.wait_for_timeout(extra_wait * 1000)
-
-            try:
-                self.status_code = response.status
-            except Exception as e:
-                await context.close()
-                await browser.close()
-                raise PageUnloadable(url=url, status_code=None, message=str(e))
-
-            if fetch_favicon:
-                try:
-                    self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
-                    await _safe_request_gc(self.page)
-                except Exception as e:
-                    logger.error(f"Error fetching favicon: {e}")
-
-            if self.status_code != 200 and not ignore_status_codes:
-                screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
-                try:
-                    page_html = await self.page.content()
-                except Exception as e:
-                    logger.warning(f"Got non-200 status {self.status_code} but failed to fetch page content: {e}")
-                    page_html = None
-                raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot, page_html=page_html)
-
-            if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
-                await context.close()
-                await browser.close()
-                raise EmptyReply(url=url, status_code=response.status)
-
-            try:
-                if self.browser_steps:
-                    try:
-                        await self.iterate_browser_steps(start_url=url)
-                    except BrowserStepsStepException:
-                        raise
-                    await self.page.wait_for_timeout(extra_wait * 1000)
-
-                now = time.time()
-                if current_include_filters is not None:
-                    await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
-                else:
-                    await self.page.evaluate("var include_filters=''")
-                await _safe_request_gc(self.page)
-
-                MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
-                self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
-                    "visualselector_xpath_selectors": visualselector_xpath_selectors,
-                    "max_height": MAX_TOTAL_HEIGHT
-                })
-                await _safe_request_gc(self.page)
-
-                self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
-                await _safe_request_gc(self.page)
-
-                self.content = await self.page.content()
-                await _safe_request_gc(self.page)
-                logger.debug(f"Scrape xPath element data done in {time.time() - now:.2f}s")
-
-                self.screenshot = await capture_full_page_async(
-                    page=self.page,
-                    screenshot_format=self.screenshot_format,
-                    watch_uuid=watch_uuid,
-                    lock_viewport_elements=self.lock_viewport_elements,
-                )
-                await _safe_request_gc(self.page)
-                gc.collect()
-
-            except ScreenshotUnavailable:
-                raise ScreenshotUnavailable(url=url, status_code=self.status_code)
-
-            finally:
-                for obj, name, close_coro in [
-                    (self.page if hasattr(self, 'page') and self.page else None, 'page', lambda: self.page.close() if self.page else asyncio.sleep(0)),
-                    (context, 'context', lambda: context.close() if context else asyncio.sleep(0)),
-                    (browser, 'browser', lambda: browser.close() if browser else asyncio.sleep(0)),
-                ]:
-                    try:
-                        await asyncio.wait_for(close_coro(), timeout=5.0)
-                    except asyncio.TimeoutError:
-                        logger.warning(f"Timed out closing {name} for {url}")
-                    except Exception as e:
-                        logger.warning(f"Error closing {name} for {url}: {e}")
-
-                self.page = None
-                context = None
-                browser = None
-                gc.collect()
@@ -1,27 +0,0 @@
-"""
-Playwright Chrome fetcher — launches a local Chromium browser directly.
-
-No external browser container is required.  Playwright must be installed
-with Chromium browsers: ``playwright install chromium``.
-"""
-from changedetectionio.pluggy_interface import hookimpl
-from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
-
-
-class fetcher(PlaywrightBaseFetcher):
-    fetcher_description = "Playwright Chrome (local)"
-
-    async def _connect_browser(self, p):
-        launch_kwargs = {'headless': True}
-        if self.proxy:
-            launch_kwargs['proxy'] = self.proxy
-        return await p.chromium.launch(**launch_kwargs)
-
-
-class PlaywrightChromePlugin:
-    @hookimpl
-    def register_content_fetcher(self):
-        return ('playwright_chrome', fetcher)
-
-
-chrome_plugin = PlaywrightChromePlugin()
@@ -1,33 +0,0 @@
-"""
-Playwright Firefox fetcher — launches a local Firefox browser directly.
-
-No external browser container is required.  Playwright must be installed
-with Firefox browsers: ``playwright install firefox``.
-
-Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
-on Firefox — this is handled transparently by ``_safe_request_gc()`` in
-the base package.
-"""
-from changedetectionio.pluggy_interface import hookimpl
-from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
-
-
-class fetcher(PlaywrightBaseFetcher):
-    fetcher_description = "Playwright Firefox (local)"
-
-    status_icon = {'filename': 'firefox-icon.svg', 'alt': 'Using Firefox', 'title': 'Using Firefox'}
-
-    async def _connect_browser(self, p):
-        launch_kwargs = {'headless': True}
-        if self.proxy:
-            launch_kwargs['proxy'] = self.proxy
-        return await p.firefox.launch(**launch_kwargs)
-
-
-class PlaywrightFirefoxPlugin:
-    @hookimpl
-    def register_content_fetcher(self):
-        return ('playwright_firefox', fetcher)
-
-
-firefox_plugin = PlaywrightFirefoxPlugin()
@@ -1,30 +0,0 @@
-"""
-Playwright WebKit fetcher — launches a local WebKit (Safari-engine) browser.
-
-No external browser container is required.  Playwright must be installed
-with WebKit browsers: ``playwright install webkit``.
-
-Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
-on WebKit — handled transparently by ``_safe_request_gc()`` in the base package.
-"""
-from changedetectionio.pluggy_interface import hookimpl
-from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
-
-
-class fetcher(PlaywrightBaseFetcher):
-    fetcher_description = "Playwright WebKit/Safari (local)"
-
-    async def _connect_browser(self, p):
-        launch_kwargs = {'headless': True}
-        if self.proxy:
-            launch_kwargs['proxy'] = self.proxy
-        return await p.webkit.launch(**launch_kwargs)
-
-
-class PlaywrightWebKitPlugin:
-    @hookimpl
-    def register_content_fetcher(self):
-        return ('playwright_webkit', fetcher)
-
-
-webkit_plugin = PlaywrightWebKitPlugin()
@@ -7,7 +7,6 @@ from urllib.parse import urlparse

 from loguru import logger

-from changedetectionio.pluggy_interface import hookimpl
 from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
    SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
    SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
@@ -76,6 +75,9 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
    if page_height > page.viewport['height']:
        if page_height < step_size:
            step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
+        # Never set viewport taller than our max capture height - otherwise one screenshot chunk
+        # captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
+        step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
        viewport_start = time.time()
        await page.setViewport({'width': page.viewport['width'], 'height': step_size})
        viewport_time = time.time() - viewport_start
@@ -167,8 +169,11 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc


 class fetcher(Fetcher):
-    fetcher_description = "Puppeteer Chromium"
-    requires_connection_url = True
+    fetcher_description = "Puppeteer/direct {}/Javascript".format(
+        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
+    )
+    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
+        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))

    browser = None
    browser_type = ''
@@ -180,10 +185,14 @@ class fetcher(Fetcher):
    supports_screenshots = True
    supports_xpath_element_data = True

-    status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
-
-    def disk_cleanup_after_fetch(self):
-        self.delete_browser_steps_screenshots()
+    @classmethod
+    def get_status_icon_data(cls):
+        """Return Chrome browser icon data for Puppeteer fetcher."""
+        return {
+            'filename': 'google-chrome-icon.png',
+            'alt': 'Using a Chrome browser',
+            'title': 'Using a Chrome browser'
+        }

    def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
        super().__init__(**kwargs)
@@ -192,10 +201,9 @@ class fetcher(Fetcher):
            self.browser_connection_is_custom = True
            self.browser_connection_url = custom_browser_connection_url
        else:
-            from loguru import logger
-            logger.critical("Puppeteer fetcher has no browser_connection_url — browser profile was not configured. "
-                            "Set PLAYWRIGHT_DRIVER_URL or configure a browser profile in Settings.")
-            self.browser_connection_url = None
+            # Fallback to fetching from system
+            # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
+            self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')

        # allow per-watch proxy selection override
        # @todo check global too?
@@ -265,7 +273,7 @@ class fetcher(Fetcher):
        import re
        self.delete_browser_steps_screenshots()

-        n = self.extra_delay + self.render_extract_delay
+        n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 12)) + self.render_extract_delay
        extra_wait = min(n, 15)

        logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
@@ -442,12 +450,8 @@ class fetcher(Fetcher):

        if self.status_code != 200 and not ignore_status_codes:
            screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
-            try:
-                page_html = await self.page.content
-            except Exception as e:
-                logger.warning(f"Got non-200 status {self.status_code} but failed to fetch page content: {e}")
-                page_html = None
-            raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot, page_html=page_html)
+
+            raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)

        content = await self.page.content

@@ -547,10 +551,9 @@ class fetcher(Fetcher):
 class PuppeteerFetcherPlugin:
    """Plugin class that registers the Puppeteer fetcher as a built-in plugin."""

-    @hookimpl
    def register_content_fetcher(self):
        """Register the Puppeteer fetcher"""
-        return ('puppeteer', fetcher)
+        return ('html_webdriver', fetcher)


 # Create module-level instance for plugin registration
@@ -1,3 +1,4 @@
+from flask_babel import lazy_gettext as _l
 from loguru import logger
 from urllib.parse import urljoin, urlparse
 import hashlib
@@ -8,13 +9,12 @@ import asyncio
 from changedetectionio import strtobool
 from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
 from changedetectionio.content_fetchers.base import Fetcher
-from changedetectionio.pluggy_interface import hookimpl
-from changedetectionio.validate_url import is_private_hostname
+from changedetectionio.validate_url import is_private_hostname, is_url_private_or_parser_confused


 # "html_requests" is listed as the default fetcher in store.py!
 class fetcher(Fetcher):
-    fetcher_description = "Basic fast Plaintext/HTTP Client"
+    fetcher_description = _l("Basic fast Plaintext/HTTP Client")

    def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
        super().__init__(**kwargs)
@@ -87,10 +87,12 @@ class fetcher(Fetcher):

        try:
            # Fresh DNS check at fetch time — catches DNS rebinding regardless of add-time cache.
+            # Validates every hostname both urlparse and urllib3 see, so parser-differential
+            # payloads (GHSA-rph4-96w6-q594) cannot smuggle an internal target past the gate.
            if not allow_iana_restricted:
-                parsed_initial = urlparse(url)
-                if parsed_initial.hostname and is_private_hostname(parsed_initial.hostname):
-                    raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address. "
+                if is_url_private_or_parser_confused(url):
+                    raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address "
+                                    f"or contains a parser-differential payload. "
                                    f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow.")

            r = session.request(method=request_method,
@@ -111,9 +113,9 @@ class fetcher(Fetcher):
                location = r.headers.get('Location', '')
                redirect_url = urljoin(current_url, location)
                if not allow_iana_restricted:
-                    parsed_redirect = urlparse(redirect_url)
-                    if parsed_redirect.hostname and is_private_hostname(parsed_redirect.hostname):
-                        raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address.")
+                    if is_url_private_or_parser_confused(redirect_url):
+                        raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address "
+                                        f"or contains a parser-differential payload.")
                current_url = redirect_url
                r = session.request('GET', redirect_url,
                                    headers=request_headers,
@@ -259,10 +261,9 @@ class fetcher(Fetcher):
 class RequestsFetcherPlugin:
    """Plugin class that registers the requests fetcher as a built-in plugin."""

-    @hookimpl
    def register_content_fetcher(self):
        """Register the requests fetcher"""
-        return ('requests', fetcher)
+        return ('html_requests', fetcher)


 # Create module-level instance for plugin registration
@@ -38,26 +38,39 @@
      if (a.size !== b.size) {
        return b.size - a.size;
      }
-      
+
      // Second priority: apple-touch-icon over regular icon
      const isAppleA = /apple-touch-icon/.test(a.rel);
      const isAppleB = /apple-touch-icon/.test(b.rel);
      if (isAppleA && !isAppleB) return -1;
      if (!isAppleA && isAppleB) return 1;
-      
+
      // Third priority: icons with no size attribute (fallback icons) last
      const hasNoSizeA = !a.hasSizes;
      const hasNoSizeB = !b.hasSizes;
      if (hasNoSizeA && !hasNoSizeB) return 1;
      if (!hasNoSizeA && hasNoSizeB) return -1;
-      
+
      return 0;
    });

    const timeoutMs = 2000;
+    // 1 MB — matches the server-side limit in bump_favicon()
+    const MAX_BYTES = 1 * 1024 * 1024;

    for (const icon of icons) {
      try {
+        // Inline data URI — no network fetch needed, data is already here
+        if (icon.href.startsWith('data:')) {
+          const match = icon.href.match(/^data:([^;]+);base64,([A-Za-z0-9+/=]+)$/);
+          if (!match) continue;
+          const mime_type = match[1];
+          const base64 = match[2];
+          // Rough size check: base64 is ~4/3 the binary size
+          if (base64.length * 0.75 > MAX_BYTES) continue;
+          return { url: icon.href, mime_type, base64 };
+        }
+
        const controller = new AbortController();
        const timeout = setTimeout(() => controller.abort(), timeoutMs);

@@ -74,12 +87,15 @@

        const blob = await resp.blob();

+        if (blob.size > MAX_BYTES) continue;
+
        // Convert blob to base64
        const reader = new FileReader();
        return await new Promise(resolve => {
          reader.onloadend = () => {
            resolve({
              url: icon.href,
+              mime_type: blob.type,
              base64: reader.result.split(",")[1]
            });
          };
@@ -98,4 +114,3 @@
  // Auto-execute and return result for page.evaluate()
  return await window.getFaviconAsBlob();
 })();
-
@@ -56,6 +56,10 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
            im.close()
        del images

+        # Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
+        if total_height > capture_height:
+            stitched = stitched.crop((0, 0, max_width, capture_height))
+
        # Draw caption only if page was trimmed
        if original_page_height > capture_height:
            draw = ImageDraw.Draw(stitched)
@@ -3,13 +3,13 @@ import time

 from loguru import logger
 from changedetectionio.content_fetchers.base import Fetcher
-from changedetectionio.content_fetchers.exceptions import Non200ErrorCodeReceived
-from changedetectionio.pluggy_interface import hookimpl


 class fetcher(Fetcher):
-    fetcher_description = "Selenium WebDriver Chrome"
-    requires_connection_url = True
+    if os.getenv("WEBDRIVER_URL"):
+        fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\""
+    else:
+        fetcher_description = "WebDriver Chrome/Javascript"

    proxy = None
    proxy_url = None
@@ -19,21 +19,26 @@ class fetcher(Fetcher):
    supports_screenshots = True
    supports_xpath_element_data = True

-    status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
+    @classmethod
+    def get_status_icon_data(cls):
+        """Return Chrome browser icon data for WebDriver fetcher."""
+        return {
+            'filename': 'google-chrome-icon.png',
+            'alt': 'Using a Chrome browser',
+            'title': 'Using a Chrome browser'
+        }

    def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
        super().__init__(**kwargs)
        from urllib.parse import urlparse
        from selenium.webdriver.common.proxy import Proxy

-        if custom_browser_connection_url:
+        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
+        if not custom_browser_connection_url:
+            self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
+        else:
            self.browser_connection_is_custom = True
            self.browser_connection_url = custom_browser_connection_url
-        else:
-            from loguru import logger
-            logger.critical("Selenium WebDriver fetcher has no browser_connection_url — browser profile was not configured. "
-                            "Set WEBDRIVER_URL or configure a browser profile in Settings.")
-            self.browser_connection_url = None

        ##### PROXY SETUP #####

@@ -99,15 +104,17 @@ class fetcher(Fetcher):

            from selenium.webdriver.remote.remote_connection import RemoteConnection
            from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
+            from selenium.webdriver.remote.client_config import ClientConfig
+            from urllib3.util import Timeout
            driver = None
            try:
-                # Create the RemoteConnection and set timeout (e.g., 30 seconds)
-                remote_connection = RemoteConnection(
-                    self.browser_connection_url,
+                connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
+                client_config = ClientConfig(
+                    remote_server_addr=self.browser_connection_url,
+                    timeout=Timeout(connect=connection_timeout, total=connection_timeout)
                )
-                remote_connection.set_timeout(30)  # seconds
+                remote_connection = RemoteConnection(client_config=client_config)

-                # Now create the driver with the RemoteConnection
                driver = RemoteWebDriver(
                    command_executor=remote_connection,
                    options=options
@@ -125,28 +132,22 @@ class fetcher(Fetcher):
                if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
                    driver.set_window_size(1280, 1024)

-                driver.implicitly_wait(self.extra_delay)
+                driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))

                if self.webdriver_js_execute_code is not None:
                    driver.execute_script(self.webdriver_js_execute_code)
                    # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
-                    driver.implicitly_wait(self.extra_delay)
+                    driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+
+                # @todo - how to check this? is it possible?
+                self.status_code = 200
+                # @todo somehow we should try to get this working for WebDriver
+                # raise EmptyReply(url=url, status_code=r.status_code)

                # @todo - dom wait loaded?
                import time
-                time.sleep(self.extra_delay + self.render_extract_delay)
+                time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
                self.content = driver.page_source
-
-                # Use Navigation Timing API to get the real HTTP status code (Chrome 102+)
-                # Read after the sleep so the page is fully settled
-                try:
-                    nav_status = driver.execute_script(
-                        "return window.performance.getEntriesByType('navigation')[0]?.responseStatus"
-                    )
-                    # Guard against 0 (file://, blocked requests) which should not raise Non200
-                    self.status_code = int(nav_status) if nav_status and int(nav_status) > 0 else 200
-                except Exception:
-                    self.status_code = 200
                self.headers = {}

                # Selenium always captures as PNG, convert to JPEG if needed
@@ -176,10 +177,6 @@ class fetcher(Fetcher):
                    img.close()
                else:
                    self.screenshot = screenshot_png
-
-                if self.status_code != 200 and not ignore_status_codes:
-                    raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=self.screenshot, page_html=self.content)
-
            except Exception as e:
                driver.quit()
                raise e
@@ -195,10 +192,9 @@ class fetcher(Fetcher):
 class WebDriverSeleniumFetcherPlugin:
    """Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""

-    @hookimpl
    def register_content_fetcher(self):
        """Register the WebDriver Selenium fetcher"""
-        return ('selenium', fetcher)
+        return ('html_webdriver', fetcher)


 # Create module-level instance for plugin registration
@@ -45,8 +45,38 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
 # Compiled regex patterns for performance
 WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')

+# Regexes built from the constants above — no brittle hardcoded strings
+_EXTRACT_REMOVED_RE = re.compile(
+    re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
+    + r'|' +
+    re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
+)
+_EXTRACT_ADDED_RE = re.compile(
+    re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
+    + r'|' +
+    re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
+)

-def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
+
+def extract_changed_from(raw_diff: str) -> str:
+    """Extract only the removed/changed-from fragments from a raw diff string.
+
+    Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
+    not the full surrounding line. Multiple fragments joined with newlines.
+    """
+    return '\n'.join(next((g for g in m.groups() if g is not None), '') for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
+
+
+def extract_changed_to(raw_diff: str) -> str:
+    """Extract only the added/changed-into fragments from a raw diff string.
+
+    Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
+    not the full surrounding line. Multiple fragments joined with newlines.
+    """
+    return '\n'.join(next((g for g in m.groups() if g is not None), '') for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
+
+
+def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
    """
    Render word-level differences between two lines inline using diff-match-patch library.

@@ -133,14 +163,20 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
        if removed_tokens:
            removed_full = ''.join(removed_tokens).rstrip()
            trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
-            result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
+            if include_change_type_prefix:
+                result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
+            else:
+                result_parts.append(f'{removed_full}{trailing_removed}')

        if added_tokens:
            if result_parts:  # Add newline between removed and added
                result_parts.append('\n')
            added_full = ''.join(added_tokens).rstrip()
            trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
-            result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
+            if include_change_type_prefix:
+                result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
+            else:
+                result_parts.append(f'{added_full}{trailing_added}')

        return ''.join(result_parts), has_changes
    else:
@@ -150,21 +186,27 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
            if op == 0:  # Equal
                result_parts.append(text)
            elif op == 1:  # Insertion
-                # Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
-                content = text.rstrip()
-                trailing = text[len(content):] if len(text) > len(content) else ''
-                if content:
-                    result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
+                if not include_change_type_prefix:
+                    result_parts.append(text)
                else:
-                    result_parts.append(trailing)
+                    # Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
+                    content = text.rstrip()
+                    trailing = text[len(content):] if len(text) > len(content) else ''
+                    if content:
+                        result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
+                    else:
+                        result_parts.append(trailing)
            elif op == -1:  # Deletion
-                # Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
-                content = text.rstrip()
-                trailing = text[len(content):] if len(text) > len(content) else ''
-                if content:
-                    result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
+                if not include_change_type_prefix:
+                    result_parts.append(text)
                else:
-                    result_parts.append(trailing)
+                    # Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
+                    content = text.rstrip()
+                    trailing = text[len(content):] if len(text) > len(content) else ''
+                    if content:
+                        result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
+                    else:
+                        result_parts.append(trailing)

        return ''.join(result_parts), has_changes

@@ -360,7 +402,7 @@ def customSequenceMatcher(

            # Use inline word-level diff for single line replacements when word_diff is enabled
            if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
-                inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
+                inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix)
                # Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
                if ignore_junk and not has_changes:
                    # No real changes, skip this line
@@ -415,8 +457,8 @@ def render_diff(
    Returns:
        str: Rendered difference
    """
-    newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
-    previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
+    newest_lines = [line.rstrip() for line in (newest_version_file_contents or '').splitlines()]
+    previous_lines = [line.rstrip() for line in (previous_version_file_contents or '').splitlines()]
    now = time.time()
    logger.debug(
        f"diff options: "
@@ -212,6 +212,11 @@ def _is_safe_valid_url(test_url):
    from .validate_url import is_safe_valid_url
    return is_safe_valid_url(test_url)

+@app.template_global('get_html_head_extras')
+def _get_html_head_extras():
+    from .pluggy_interface import collect_html_head_extras
+    return collect_html_head_extras()
+

@app.template_filter('format_number_locale')
 def _jinja2_filter_format_number_locale(value: float) -> str:
@@ -341,36 +346,52 @@ def _jinja2_filter_format_duration(seconds):

@app.template_filter('fetcher_status_icons')
 def _jinja2_filter_fetcher_status_icons(fetcher_name):
-    """Return status icon HTML for a fetcher, or empty string if none.
+    """Get status icon HTML for a given fetcher.

-    Built-in fetchers declare their icon via the ``status_icon`` class attribute
-    on their ``Fetcher`` subclass.  Plugin fetchers may still use the pluggy
-    ``collect_fetcher_status_icons`` hook as a fallback.
+    This filter checks both built-in fetchers and plugin fetchers for status icons.
+
+    Args:
+        fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
+
+    Returns:
+        str: HTML string containing status icon elements
    """
    from changedetectionio import content_fetchers
+    from changedetectionio.pluggy_interface import collect_fetcher_status_icons
    from markupsafe import Markup
    from flask import url_for

    icon_data = None

-    fetcher_class = content_fetchers.get_fetcher(fetcher_name)
-    if fetcher_class is not None:
-        icon_data = getattr(fetcher_class, 'status_icon', None)
-        if not icon_data and callable(getattr(fetcher_class, 'get_status_icon_data', None)):
+    # First check if it's a plugin fetcher (plugins have priority)
+    plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
+    if plugin_icon_data:
+        icon_data = plugin_icon_data
+    # Check if it's a built-in fetcher
+    elif hasattr(content_fetchers, fetcher_name):
+        fetcher_class = getattr(content_fetchers, fetcher_name)
+        if hasattr(fetcher_class, 'get_status_icon_data'):
            icon_data = fetcher_class.get_status_icon_data()

-    # Fallback: pluggy hook for plugins that implement fetcher_status_icon
-    if not icon_data:
-        from changedetectionio.pluggy_interface import collect_fetcher_status_icons
-        icon_data = collect_fetcher_status_icons(fetcher_name)
+    # Build HTML from icon data
+    if icon_data and isinstance(icon_data, dict):
+        # Use 'group' from icon_data if specified, otherwise default to 'images'
+        group = icon_data.get('group', 'images')

-    if not icon_data:
-        return ''
+        # Try to use url_for, but fall back to manual URL building if endpoint not registered yet
+        try:
+            icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
+        except:
+            # Fallback: build URL manually respecting APPLICATION_ROOT
+            from flask import request
+            app_root = request.script_root if hasattr(request, 'script_root') else ''
+            icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"

-    group = icon_data.get('group', 'images')
-    icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
-    style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
-    return Markup(f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>')
+        style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
+        html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
+        return Markup(html)
+
+    return ''

 _RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')

@@ -393,7 +414,7 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
    return sanitized if sanitized else 'tag'

 # Import login_optionally_required from auth_decorator
-from changedetectionio.auth_decorator import login_optionally_required
+from changedetectionio.auth_decorator import SHARED_DIFF_READ_ONLY_ENDPOINTS, login_optionally_required

 # When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object.
 class User(flask_login.UserMixin):
@@ -501,6 +522,11 @@ def changedetection_app(config=None, datastore_o=None):
        available_languages=available_languages
    )

+    @app.context_processor
+    def inject_llm_features_disabled():
+        from changedetectionio.llm.evaluator import is_llm_features_disabled
+        return dict(llm_features_disabled=is_llm_features_disabled())
+
    # Set up a request hook to check authentication for all routes
    @app.before_request
    def check_authentication():
@@ -520,7 +546,7 @@ def changedetection_app(config=None, datastore_o=None):
            # Permitted
            elif request.endpoint and 'login' in request.endpoint:
                return None
-            elif request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
+            elif request.endpoint in SHARED_DIFF_READ_ONLY_ENDPOINTS and datastore.data['settings']['application'].get('shared_diff_access'):
                return None
            elif request.method in flask_login.config.EXEMPT_METHODS:
                return None
@@ -960,6 +986,11 @@ def changedetection_app(config=None, datastore_o=None):
                    "queued_data": all_queued
                })

+    if strtobool(os.getenv('HISTORY_SNAPSHOT_FILE_ALLOW_OUTSIDE_WATCH_DATADIR', 'False')):
+        logger.warning("SECURITY WARNING: HISTORY_SNAPSHOT_FILE_ALLOW_OUTSIDE_WATCH_DATADIR is enabled — "
+                       "snapshot reads are NOT confined to the watch data directory. "
+                       "This disables protection against path traversal via restored backups (GHSA-8757-69j2-hx56).")
+
    # Start the async workers during app initialization
    # Can be overridden by ENV or use the default settings
    n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
@@ -5,6 +5,8 @@ from wtforms.widgets.core import TimeInput
 from flask_babel import lazy_gettext as _l, gettext

 from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
+from changedetectionio.llm.ui_strings import LLM_INTENT_WATCH_PLACEHOLDER
+from changedetectionio.llm.evaluator import DEFAULT_CHANGE_SUMMARY_PROMPT, LLM_DEFAULT_MAX_SUMMARY_TOKENS, LLM_DEFAULT_THINKING_BUDGET
 from changedetectionio.conditions.form import ConditionFormRow
 from changedetectionio.notification_service import NotificationContextData
 from changedetectionio.strtobool import strtobool
@@ -15,7 +17,9 @@ from wtforms import (
    Form,
    Field,
    FloatField,
+    HiddenField,
    IntegerField,
+    PasswordField,
    RadioField,
    SelectField,
    StringField,
@@ -276,12 +280,44 @@ class TimeBetweenCheckForm(Form):
        return True


+class LabelAfterInputTableWidget(widgets.TableWidget):
+    """
+    Variant of WTForms' TableWidget that renders the input cell before the label cell,
+    so each row is <td>input</td><th>label</th> instead of the default <th>label</th><td>input</td>.
+    """
+
+    def __call__(self, field, **kwargs):
+        from markupsafe import Markup
+        from wtforms.widgets import html_params
+
+        html = []
+        if self.with_table_tag:
+            kwargs.setdefault("id", field.id)
+            html.append(f"<table {html_params(**kwargs)}>")
+        hidden = ""
+        for subfield in field:
+            if subfield.type in ("HiddenField", "CSRFTokenField"):
+                hidden += str(subfield)
+            else:
+                html.append(
+                    f"<tr><td>{hidden}{subfield}</td><th>{subfield.label}</th></tr>"
+                )
+                hidden = ""
+        if self.with_table_tag:
+            html.append("</table>")
+        if hidden:
+            html.append(hidden)
+        return Markup("".join(html))
+
+
 class EnhancedFormField(FormField):
    """
    An enhanced FormField that supports conditional validation with top-level error messages.
    Adds a 'top_errors' property for validation errors at the FormField level.
    """

+    widget = LabelAfterInputTableWidget()
+
    def __init__(self, form_class, label=None, validators=None, separator="-",
                 conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs):
        """
@@ -548,6 +584,17 @@ def validate_url(test_url):
        raise ValidationError('Watch protocol is not permitted or invalid URL format')


+class validateLLMApiBaseSafe(object):
+    """Block private/loopback/reserved api_base values (SSRF) unless the operator
+    has opted in via ALLOW_IANA_RESTRICTED_ADDRESSES=true."""
+
+    def __call__(self, form, field):
+        from changedetectionio.validate_url import is_llm_api_base_safe
+        ok, reason = is_llm_api_base_safe(field.data)
+        if not ok:
+            raise ValidationError(reason)
+
+
 class ValidateSinglePythonRegexString(object):
    def __init__(self, message=None):
        self.message = message
@@ -615,8 +662,8 @@ class ValidateCSSJSONXPATHInput(object):
                try:
                    elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
                except elementpath.ElementPathError as e:
-                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
-                    raise ValidationError(message % (line, str(e)))
+                    message = field.gettext('\'%(expression)s\' is not a valid XPath expression. (%(error)s)')
+                    raise ValidationError(message % {'expression': line, 'error': str(e)})
                except:
                    raise ValidationError("A system-error occurred when validating your XPath expression")

@@ -630,8 +677,8 @@ class ValidateCSSJSONXPATHInput(object):
                try:
                    tree.xpath(line.strip())
                except etree.XPathEvalError as e:
-                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
-                    raise ValidationError(message % (line, str(e)))
+                    message = field.gettext('\'%(expression)s\' is not a valid XPath expression. (%(error)s)')
+                    raise ValidationError(message % {'expression': line, 'error': str(e)})
                except:
                    raise ValidationError("A system-error occurred when validating your XPath expression")

@@ -650,8 +697,8 @@ class ValidateCSSJSONXPATHInput(object):
                try:
                    parse(input)
                except (JsonPathParserError, JsonPathLexerError) as e:
-                    message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
-                    raise ValidationError(message % (input, str(e)))
+                    message = field.gettext('\'%(expression)s\' is not a valid JSONPath expression. (%(error)s)')
+                    raise ValidationError(message % {'expression': input, 'error': str(e)})
                except:
                    raise ValidationError("A system-error occurred when validating your JSONPath expression")

@@ -674,8 +721,8 @@ class ValidateCSSJSONXPATHInput(object):
                    validate_jq_expression(input)
                    jq.compile(input)
                except (ValueError) as e:
-                    message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
-                    raise ValidationError(message % (input, str(e)))
+                    message = field.gettext('\'%(expression)s\' is not a valid jq expression. (%(error)s)')
+                    raise ValidationError(message % {'expression': input, 'error': str(e)})
                except:
                    raise ValidationError("A system-error occurred when validating your jq expression")

@@ -725,7 +772,7 @@ class ValidateStartsWithRegex(object):
                raise ValidationError(self.message or _l("Invalid value."))

 class quickWatchForm(Form):
-    url = fields.URLField(_l('URL'), validators=[validateURL()])
+    url = StringField('URL', validators=[validateURL()])
    tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
    watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
    processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
@@ -742,6 +789,7 @@ class commonSettingsForm(Form):
        self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
        self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})

+    fetch_backend = RadioField(_l('Fetch Method'), choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
    notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
    notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
@@ -770,17 +818,16 @@ class SingleBrowserStep(Form):
    operation = SelectField(_l('Operation'), [validators.Optional()], choices=browser_step_ui_config.keys())

    # maybe better to set some <script>var..
-    selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": "CSS or xPath selector"})
-    optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": "Value"})
+    selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": _l("CSS or xPath selector")})
+    optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": _l("Value")})
 #   @todo move to JS? ajax fetch new field?
 #    remove_button = SubmitField(_l('-'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
 #    add_button = SubmitField(_l('+'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})

 class processor_text_json_diff_form(commonSettingsForm):

-    browser_profile = RadioField(_l('Browser / Fetch method'), choices=[])  # populated at runtime in edit.py
-    url = fields.URLField('Web Page URL', validators=[validateURL()])
-    tags = StringTagUUID('Group Tag', [validators.Optional()], default='')
+    url = StringField(_l('Web Page URL'), validators=[validateURL()])
+    tags = StringTagUUID(_l('Group Tag'), [validators.Optional()], default='')

    time_between_check = EnhancedFormField(
        TimeBetweenCheckForm,
@@ -794,10 +841,18 @@ class processor_text_json_diff_form(commonSettingsForm):

    time_between_check_use_default = BooleanField(_l('Use global settings for time between check and scheduler.'), default=False)

+    llm_intent = TextAreaField(_l('AI Change Intent'), validators=[validators.Optional(), validators.Length(max=2000)],
+                               render_kw={"rows": "5", "placeholder": LLM_INTENT_WATCH_PLACEHOLDER})
+
+    llm_change_summary = TextAreaField(_l('AI Change Summary'), validators=[validators.Optional(), validators.Length(max=2000)],
+                               render_kw={"rows": "5", "placeholder": DEFAULT_CHANGE_SUMMARY_PROMPT},
+                               default='')
+
    include_filters = StringListField(_l('CSS/JSONPath/JQ/XPath Filters'), [ValidateCSSJSONXPATHInput()], default='')

    subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])

+    extract_lines_containing = StringListField(_l('Extract lines containing'), [validators.Optional()])
    extract_text = StringListField(_l('Extract text'), [ValidateListRegex()])

    title = StringField(_l('Title'), default='')
@@ -917,7 +972,7 @@ class processor_text_json_diff_form(commonSettingsForm):

 class SingleExtraProxy(Form):
    # maybe better to set some <script>var..
-    proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
+    proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": _l("Name")})
    proxy_url = StringField(_l('Proxy URL'), [
        validators.Optional(),
        ValidateStartsWithRegex(
@@ -929,7 +984,7 @@ class SingleExtraProxy(Form):
    ], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})

 class SingleExtraBrowser(Form):
-    browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
+    browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": _l("Name")})
    browser_connection_url = StringField(_l('Browser connection URL'), [
        validators.Optional(),
        ValidateStartsWithRegex(
@@ -940,66 +995,10 @@ class SingleExtraBrowser(Form):
        ValidateSimpleURL()
    ], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})

-
-class BrowserProfileForm(Form):
-    """Create or edit a named BrowserProfile stored in settings.application.browser_profiles."""
-
-    name = StringField(
-        _l('Profile name'),
-        [validators.DataRequired(), validators.Length(max=100)],
-        render_kw={"placeholder": _l("e.g. Mobile Chrome, Bright Data CDP"), "maxlength": "100"}
-    )
-    fetch_backend = SelectField(
-        _l('Fetch method'),
-        choices=[],  # populated at runtime from available_fetchers()
-    )
-    browser_connection_url = StringField(
-        _l('Browser connection URL'),
-        [
-            validators.Optional(),
-            ValidateStartsWithRegex(
-                regex=r'^(wss?|ws|http|https)://',
-                flags=re.IGNORECASE,
-                message=_l('Browser connection URL must start with ws://, wss://, http://, https://')
-            ),
-            ValidateSimpleURL(),
-        ],
-        render_kw={"placeholder": "ws://my-chrome:3000", "size": 50}
-    )
-    viewport_width = IntegerField(
-        _l('Viewport width (px)'),
-        [validators.Optional(), validators.NumberRange(min=100, max=7680)],
-        default=1280,
-        render_kw={"style": "width:5em;"}
-    )
-    viewport_height = IntegerField(
-        _l('Viewport height (px)'),
-        [validators.Optional(), validators.NumberRange(min=100, max=4320)],
-        default=1000,
-        render_kw={"style": "width:5em;"}
-    )
-    block_images = BooleanField(_l('Block images (faster loads)'), default=False)
-    block_fonts = BooleanField(_l('Block web fonts'), default=False)
-    ignore_https_errors = BooleanField(_l('Ignore HTTPS/TLS errors'), default=False)
-    user_agent = StringField(
-        _l('User-Agent override'),
-        [validators.Optional(), validators.Length(max=500)],
-        render_kw={"placeholder": _l("Leave blank to use fetcher default"), "size": 60}
-    )
-    locale = StringField(
-        _l('Locale'),
-        [validators.Optional(), validators.Length(max=20)],
-        render_kw={"placeholder": "en-US, de-DE, fr-FR …", "size": 15}
-    )
-    custom_headers = TextAreaField(
-        _l('Custom headers'),
-        [validators.Optional()],
-        render_kw={
-            "placeholder": "Header-Name: value\nAnother-Header: value",
-            "rows": 4, "cols": 60,
-            "style": "font-family:monospace;"
-        }
-    )
+class DefaultUAInputForm(Form):
+    html_requests = StringField(_l('Plaintext requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
+    if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
+        html_webdriver = StringField(_l('Chrome requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})

 # datastore.data['settings']['requests']..
 class globalSettingsRequestForm(Form):
@@ -1023,6 +1022,8 @@ class globalSettingsRequestForm(Form):
    extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
    extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)

+    default_ua = FormField(DefaultUAInputForm, label=_l("Default User-Agent overrides"))
+
    def validate_extra_proxies(self, extra_validators=None):
        for e in self.data['extra_proxies']:
            if e.get('proxy_name') or e.get('proxy_url'):
@@ -1042,16 +1043,17 @@ class globalSettingsApplicationForm(commonSettingsForm):
    api_access_token_enabled = BooleanField(_l('API access token security check enabled'), default=True, validators=[validators.Optional()])
    base_url = StringField(_l('Notification base URL override'),
                           validators=[validators.Optional()],
-                           render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
+                           render_kw={"placeholder": os.getenv('BASE_URL', _l('Not set'))}
                           )
    empty_pages_are_a_change =  BooleanField(_l('Treat empty pages as a change?'), default=False)
+    fetch_backend = RadioField(_l('Fetch Method'), default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    global_ignore_text = StringListField(_l('Ignore Text'), [ValidateListRegex()])
    global_subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
    ignore_whitespace = BooleanField(_l('Ignore whitespace'))

    # Screenshot comparison settings
    min_change_percentage = FloatField(
-        'Screenshot: Minimum Change Percentage',
+        _l('Screenshot: Minimum Change Percentage'),
        validators=[
            validators.Optional(),
            validators.NumberRange(min=0.0, max=100.0, message=_l('Must be between 0 and 100'))
@@ -1091,6 +1093,149 @@ class globalSettingsApplicationForm(commonSettingsForm):
    ui = FormField(globalSettingsApplicationUIForm)


+class globalSettingsLLMForm(Form):
+    """
+    LLM / AI provider settings — stored under datastore['settings']['application']['llm'].
+
+    Uses litellm under the hood, so the model string encodes both the provider and model.
+    No separate provider dropdown needed — litellm routes automatically:
+      gpt-4o-mini                           → OpenAI
+      claude-3-5-haiku-20251001             → Anthropic
+      ollama/llama3.2                       → Ollama
+      openrouter/google/gemma-3-12b-it:free → OpenRouter (free tier)
+      gemini/gemini-2.0-flash               → Google Gemini
+      azure/gpt-4o                          → Azure OpenAI
+    """
+    model = StringField(
+        _l('Model'),
+        validators=[validators.Optional()],
+        render_kw={"placeholder": "gpt-4o-mini", "style": "width: 24em;"},
+    )
+    api_key = PasswordField(
+        _l('API Key'),
+        validators=[validators.Optional()],
+        render_kw={
+            "autocomplete": "off",
+            "style": "width: 24em;",
+        },
+    )
+    api_base = StringField(
+        _l('API Base URL'),
+        validators=[validators.Optional(), validateLLMApiBaseSafe()],
+        render_kw={
+            "placeholder": "http://localhost:11434  (Ollama / custom endpoints only)",
+            "style": "width: 24em;",
+        },
+    )
+    # Persisted by the Provider dropdown JS — lets the backend distinguish a self-hosted
+    # OpenAI-compatible endpoint (vLLM, LM Studio, llama.cpp) from cloud OpenAI, so we can
+    # apply reasoning-friendly token caps only when the user opted in.
+    provider_kind = HiddenField(
+        validators=[validators.Optional()],
+        default='',
+    )
+    # Multiplier applied to LLM max_tokens caps when provider_kind is 'ollama' or
+    # 'openai_compatible' — endpoints that commonly serve reasoning models (Qwen3,
+    # DeepSeek-R1, Gemma 3, etc.) which emit chain-of-thought into
+    # message.reasoning_content before the final answer lands in message.content.
+    # Cloud providers with non-reasoning defaults (OpenAI, Anthropic, Gemini,
+    # OpenRouter) stay on the original tight caps so existing users see no
+    # behavior or cost change. Users on paid Ollama / openai_compatible endpoints
+    # who care about cost can dial this down to 1x.
+    local_token_multiplier = IntegerField(
+        _l('Token multiplier for local reasoning models'),
+        validators=[validators.Optional(), validators.NumberRange(min=1, max=20)],
+        default=5,
+        render_kw={"placeholder": "5", "style": "width: 6em;"},
+    )
+    change_summary_default = TextAreaField(
+        _l('Default AI Change Summary prompt'),
+        validators=[validators.Optional(), validators.Length(max=2000)],
+        render_kw={
+            "rows": "5",
+            "placeholder": DEFAULT_CHANGE_SUMMARY_PROMPT,
+            "style": "width: 100%; ",
+        },
+        default='',
+    )
+    max_tokens_per_count_period = IntegerField(
+        _l('Max tokens per watch per period'),
+        validators=[validators.Optional(), validators.NumberRange(min=0)],
+        default=0,
+        render_kw={
+            "placeholder": "0 = unlimited",
+            "style": "width: 8em;",
+        },
+    )
+    token_budget_month = IntegerField(
+        _l('Monthly token budget'),
+        validators=[validators.Optional(), validators.NumberRange(min=0)],
+        default=0,
+        render_kw={"style": "width: 10em;"},
+    )
+    max_input_chars = IntegerField(
+        _l('Max input characters'),
+        validators=[validators.Optional(), validators.NumberRange(min=1)],
+        default=100000,
+        render_kw={
+            "placeholder": "100000",
+            "style": "width: 10em;",
+        },
+    )
+    # Master on/off switch for ALL LLM lookups at runtime. When False, every entry point
+    # in evaluator.py (and the restock fallback) short-circuits with a logger.debug
+    # message — even if a provider+model is still configured. Saved config and the
+    # "configured" badge remain visible so the user can toggle back on without re-entering.
+    enabled = BooleanField(
+        _l('Enable AI / LLM features'),
+        default=True,
+    )
+    override_diff_with_summary = BooleanField(
+        _l('Replace {{diff}} notification token with AI summary'),
+        default=True,
+    )
+    restock_use_fallback_extract = BooleanField(
+        _l('Use LLM as a fallback for extracting price and restock info'),
+        default=True,
+    )
+    debug = BooleanField(
+        _l('Enable LLM debug logging'),
+        default=False,
+    )
+    thinking_budget = SelectField(
+        _l('AI thinking budget (tokens)'),
+        choices=[
+            ('0',    _l('Off (no thinking)')),
+            ('100',  '100'),
+            ('500',  '500'),
+            ('2000', '2000'),
+        ],
+        default=str(LLM_DEFAULT_THINKING_BUDGET),
+        validators=[validators.Optional()],
+    )
+    max_summary_tokens = SelectField(
+        _l('Max AI summary length (tokens)'),
+        choices=[
+            ('500',   '500'),
+            ('1000',  '1000'),
+            ('3000',  '3000'),
+            ('5000',  '5000'),
+            ('10000', '10000'),
+            ('15000', '15000'),
+        ],
+        default=str(LLM_DEFAULT_MAX_SUMMARY_TOKENS),
+        validators=[validators.Optional()],
+    )
+    budget_action = RadioField(
+        _l('When monthly token budget is reached'),
+        choices=[
+            ('skip_llm',   _l('Skip AI summarisation only (watch still checks)')),
+            ('skip_check', _l('Skip the watch check entirely')),
+        ],
+        default='skip_llm',
+    )
+
+
 class globalSettingsForm(Form):
    # Define these as FormFields/"sub forms", this way it matches the JSON storage
    # datastore.data['settings']['application']..
@@ -1103,6 +1248,7 @@ class globalSettingsForm(Form):

    requests = FormField(globalSettingsRequestForm)
    application = FormField(globalSettingsApplicationForm)
+    llm = FormField(globalSettingsLLMForm)
    save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})


@@ -282,7 +282,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
    try:
        if is_xml:
            # So that we can keep CDATA for cdata_in_document_to_text() to process
-            parser = etree.XMLParser(strip_cdata=False)
+            parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True)
            # For XML/RSS content, use etree.fromstring to properly handle XML declarations
            tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
        else:
@@ -346,7 +346,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
    try:
        if is_xml:
            # So that we can keep CDATA for cdata_in_document_to_text() to process
-            parser = etree.XMLParser(strip_cdata=False)
+            parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True)
            # For XML/RSS content, use etree.fromstring to properly handle XML declarations
            tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
        else:
@@ -757,16 +757,41 @@ def get_triggered_text(content, trigger_text):


 def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None:
+    """Extract the <title> from an HTML document.
+
+    Rather than decoding/scanning a fixed prefix of the whole document, we first
+    locate the raw ``<title`` marker and then decode only a small window around
+    it.  This handles pages (e.g. Amazon) where large ``<head>`` sections push
+    the title tag well past the old 8 192-character scan limit.
+    """
+    # Maximum bytes/chars to extract after (and including) the opening <title tag.
+    # The regex needs to see </title>, so the window must cover the full content.
+    # The return value is always capped at 2 000 chars; titles beyond that are
+    # rare but possible.  We read up to 128 KiB from the tag onwards to handle
+    # even pathological cases without scanning the whole document.
+    _TITLE_WINDOW = 131072
+
    try:
-        # Only decode/process the prefix we need for title extraction
        match data:
-            case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")):
-                prefix = data[:scan_chars * 2].decode("utf-16", errors="replace")
            case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")):
-                prefix = data[:scan_chars * 4].decode("utf-32", errors="replace")
+                # UTF-32: locate the tag in the raw bytes, then decode the window.
+                tag_pos = data.lower().find(b"<\x00\x00\x00t\x00\x00\x00")
+                if tag_pos == -1:
+                    return None
+                chunk = data[tag_pos: tag_pos + _TITLE_WINDOW * 4].decode("utf-32", errors="replace")
+                prefix = chunk
+            case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")):
+                # UTF-16: simple byte-pair search is tricky; fall back to decoding
+                # a reasonable head chunk and let the regex do the rest.
+                prefix = data[: max(scan_chars * 2, _TITLE_WINDOW)].decode("utf-16", errors="replace")
            case bytes():
+                # UTF-8 / legacy 8-bit: find the tag cheaply in raw bytes.
+                tag_pos = data.lower().find(b"<title")
+                if tag_pos == -1:
+                    return None
+                raw_chunk = data[tag_pos: tag_pos + _TITLE_WINDOW]
                try:
-                    prefix = data[:scan_chars].decode("utf-8")
+                    chunk = raw_chunk.decode("utf-8")
                except UnicodeDecodeError:
                    try:
                        head = data[:sniff_bytes].decode("ascii", errors="ignore")
@@ -774,23 +799,27 @@ def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int =
                            enc = m.group(1).lower()
                        else:
                            enc = "cp1252"
-                        prefix = data[:scan_chars * 2].decode(enc, errors="replace")
+                        chunk = raw_chunk.decode(enc, errors="replace")
                    except Exception as e:
                        logger.error(f"Title extraction encoding detection failed: {e}")
                        return None
+                prefix = chunk
            case str():
-                prefix = data[:scan_chars] if len(data) > scan_chars else data
+                tag_pos = data.lower().find("<title")
+                if tag_pos == -1:
+                    return None
+                prefix = data[tag_pos: tag_pos + _TITLE_WINDOW]
            case _:
                logger.error(f"Title extraction received unsupported data type: {type(data)}")
                return None

-        # Search only in the prefix
+        # Search only in the (now tag-anchored) prefix
        if m := TITLE_RE.search(prefix):
            title = html.unescape(" ".join(m.group(1).split())).strip()
            # Some safe limit
            return title[:2000]
        return None
-        
+
    except Exception as e:
        logger.error(f"Title extraction failed: {e}")
        return None
@@ -28,18 +28,20 @@ def get_timeago_locale(flask_locale):
        str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
    """
    locale_map = {
-        'zh': 'zh_CN',      # Chinese Simplified
+        'zh': 'zh_CN',          # Chinese Simplified
        # timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
-        'zh_TW': 'zh_TW',   # Chinese Traditional (timeago uses zh_TW)
+        'zh_TW': 'zh_TW',       # Chinese Traditional (timeago uses zh_TW)
        'zh_Hant_TW': 'zh_TW',  # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
-        'pt': 'pt_PT',      # Portuguese (Portugal)
-        'sv': 'sv_SE',      # Swedish
-        'no': 'nb_NO',      # Norwegian Bokmål
-        'hi': 'in_HI',      # Hindi
-        'cs': 'en',         # Czech not supported by timeago, fallback to English
-        'uk': 'uk',         # Ukrainian
-        'en_GB': 'en',      # British English - timeago uses 'en'
-        'en_US': 'en',      # American English - timeago uses 'en'
+        'pt': 'pt_PT',          # Portuguese (Portugal)
+        'pt_BR': 'pt_BR',       # Portuguese (Brasil)
+        'sv': 'sv_SE',          # Swedish
+        'no': 'nb_NO',          # Norwegian Bokmål
+        'hi': 'in_HI',          # Hindi
+        'cs': 'en',             # Czech not supported by timeago, fallback to English
+        'ja': 'ja',             # Japanese
+        'uk': 'uk',             # Ukrainian
+        'en_GB': 'en',          # British English - timeago uses 'en'
+        'en_US': 'en',          # American English - timeago uses 'en'
    }
    return locale_map.get(flask_locale, flask_locale)

@@ -53,7 +55,8 @@ LANGUAGE_DATA = {
    'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
    'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
    'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
-    'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
+    'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
+    'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
    'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
    'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
    'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
@@ -0,0 +1 @@
+# LLM intent-based change evaluation
@@ -0,0 +1,52 @@
+"""
+BM25-based relevance trimming for large snapshot text.
+
+When a snapshot is large and no CSS pre-filter has narrowed it down,
+we use BM25 to select the lines most relevant to the user's intent
+before sending to the LLM. This keeps the context focused without
+an arbitrary char truncation.
+
+Pure functions — no side effects, fully testable.
+"""
+
+MAX_CONTEXT_CHARS = 15_000
+
+
+def trim_to_relevant(text: str, query: str, max_chars: int = MAX_CONTEXT_CHARS) -> str:
+    """
+    Return the lines from `text` most relevant to `query` up to `max_chars`.
+    If text fits within budget, return it unchanged.
+    Falls back to head-truncation if rank_bm25 is unavailable.
+    """
+    if not text or not query:
+        return text or ''
+
+    if len(text) <= max_chars:
+        return text
+
+    lines = [l for l in text.splitlines() if l.strip()]
+    if not lines:
+        return text[:max_chars]
+
+    try:
+        from rank_bm25 import BM25Okapi
+    except ImportError:
+        # rank-bm25 not installed — fall back to simple head truncation
+        return text[:max_chars]
+
+    tokenized = [line.lower().split() for line in lines]
+    bm25 = BM25Okapi(tokenized)
+    scores = bm25.get_scores(query.lower().split())
+
+    ranked = sorted(enumerate(zip(scores, lines)), key=lambda x: x[1][0], reverse=True)
+
+    selected_indices, total = [], 0
+    for idx, (_score, line) in ranked:
+        if total + len(line) + 1 > max_chars:
+            break
+        selected_indices.append(idx)
+        total += len(line) + 1
+
+    # Re-order selected lines to preserve original document order
+    ordered = [lines[i] for i in sorted(selected_indices)]
+    return '\n'.join(ordered)
@@ -0,0 +1,162 @@
+"""
+Thin wrapper around litellm.completion.
+Keeps litellm import isolated so the rest of the codebase doesn't depend on it directly,
+and makes the call easy to mock in tests.
+"""
+
+import logging
+import os
+from loguru import logger
+
+# Default output token cap for JSON-returning calls (intent eval, preview, setup).
+# These return small JSON objects — 400 is enough for a verbose explanation while
+# still preventing runaway cost. Change summaries pass their own max_tokens via
+# _summary_max_tokens() and are NOT subject to this cap.
+_MAX_COMPLETION_TOKENS = 400
+
+DEFAULT_TIMEOUT = int(os.getenv('LLM_TIMEOUT', 60))
+DEFAULT_RETRIES = 3
+
+
+class _LoguruInterceptHandler(logging.Handler):
+    # Routes litellm's stdlib log records through loguru so debug output
+    # uses the same format/sink as the rest of the app.
+    def emit(self, record):
+        try:
+            level = logger.level(record.levelname).name
+        except (ValueError, AttributeError):
+            level = record.levelno
+        logger.opt(exception=record.exc_info).log(level, record.getMessage())
+
+
+_debug_installed = False
+
+
+def _install_litellm_debug():
+    # Attach our loguru intercept and clear any pre-existing handlers so litellm's
+    # own stdout StreamHandler (installed by _turn_on_debug / set_verbose) doesn't
+    # double-emit. Setting the logger level to DEBUG is enough to make litellm
+    # produce debug records — we don't call _turn_on_debug() for that reason.
+    global _debug_installed
+    if _debug_installed:
+        return
+
+    handler = _LoguruInterceptHandler()
+    handler.setLevel(logging.DEBUG)
+    for _name in ('LiteLLM', 'litellm', 'litellm.utils', 'litellm.router'):
+        _lg = logging.getLogger(_name)
+        _lg.handlers = []
+        _lg.setLevel(logging.DEBUG)
+        _lg.addHandler(handler)
+        _lg.propagate = False
+
+    _debug_installed = True
+    logger.info("LLM client: litellm debug logging routed through loguru")
+
+
+def completion(model: str, messages: list, api_key: str = None,
+               api_base: str = None, timeout: int = DEFAULT_TIMEOUT,
+               max_tokens: int = None, extra_body: dict = None,
+               debug: bool = False) -> tuple[str, int, int, int]:
+    """
+    Call the LLM and return (response_text, total_tokens, input_tokens, output_tokens).
+    Retries up to DEFAULT_RETRIES times on timeout or connection errors.
+    Token counts are 0 if the provider doesn't return usage data.
+    Raises on network/auth errors — callers handle gracefully.
+    """
+    try:
+        import litellm
+    except ImportError:
+        raise RuntimeError("litellm is not installed. Add it to requirements.txt.")
+
+    if debug:
+        _install_litellm_debug()
+
+    _timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
+
+    kwargs = {
+        'model': model,
+        'messages': messages,
+        'timeout': _timeout,
+        'temperature': 0,
+        'max_tokens': max_tokens if max_tokens is not None else _MAX_COMPLETION_TOKENS,
+    }
+    if api_key:
+        kwargs['api_key'] = api_key
+    if api_base:
+        kwargs['api_base'] = api_base
+    if extra_body:
+        kwargs['extra_body'] = extra_body
+
+    _retryable = (litellm.Timeout, litellm.APIConnectionError)
+
+    logger.debug(
+        f"LLM client: calling model={model!r} api_base={api_base!r} "
+        f"timeout={_timeout}s max_tokens={kwargs['max_tokens']}"
+    )
+    logger.trace(messages)
+
+    for attempt in range(1, DEFAULT_RETRIES + 1):
+        try:
+            response = litellm.completion(**kwargs)
+            choice   = response.choices[0]
+            message  = choice.message
+            finish   = getattr(choice, 'finish_reason', None)
+
+            text = message.content or ''
+
+            if not text:
+                # Some providers (e.g. Gemini) put text in message.parts instead of .content
+                parts = getattr(message, 'parts', None)
+                if parts:
+                    text = ''.join(getattr(p, 'text', '') or '' for p in parts).strip()
+                    logger.debug(f"LLM client: extracted text from message.parts ({len(parts)} parts) model={model!r}")
+
+            if finish == 'length':
+                logger.warning(
+                    f"LLM client: response truncated (finish_reason='length') model={model!r} "
+                    f"— increase max_tokens; got {len(text)} chars so far"
+                )
+
+            if not text:
+                logger.warning(
+                    f"LLM client: empty content from model={model!r} "
+                    f"finish_reason={finish!r} "
+                    f"message={message!r}"
+                )
+
+            usage = getattr(response, 'usage', None)
+            input_tokens  = int(getattr(usage, 'prompt_tokens',     0) or 0) if usage else 0
+            output_tokens = int(getattr(usage, 'completion_tokens', 0) or 0) if usage else 0
+            total_tokens  = int(getattr(usage, 'total_tokens',      0) or 0) if usage else (input_tokens + output_tokens)
+            logger.debug(
+                f"LLM client: model={model!r} finish={finish!r} "
+                f"tokens={total_tokens} (in={input_tokens} out={output_tokens}) "
+                f"text_len={len(text)}"
+            )
+            return text, total_tokens, input_tokens, output_tokens
+
+        except _retryable as e:
+            # litellm formats its Timeout message with None when the provider doesn't
+            # propagate the timeout value — patch the exception args in-place so every
+            # caller that logs str(e) sees the real number.
+            _fix = f'after {_timeout} seconds'
+            try:
+                e.args = tuple(str(a).replace('after None seconds', _fix) for a in e.args)
+            except Exception:
+                pass
+            if attempt < DEFAULT_RETRIES:
+                logger.warning(
+                    f"LLM call timed out/connection error (attempt {attempt}/{DEFAULT_RETRIES}), "
+                    f"retrying — model={model!r} timeout={_timeout}s error={e}"
+                )
+                continue
+            logger.warning(
+                f"LLM call failed after {DEFAULT_RETRIES} attempts ({_timeout}s timeout) "
+                f"model={model!r} error={e}"
+            )
+            raise
+
+        except Exception as e:
+            logger.warning(f"LLM call failed: model={model!r} error={e}")
+            raise
@@ -0,0 +1,804 @@
+"""
+LLM evaluation orchestration.
+
+Two public entry points:
+  - run_setup(watch, datastore)        — one-time: decide if pre-filter needed
+  - evaluate_change(watch, datastore, diff, current_snapshot) — per-change evaluation
+
+Intent resolution: watch.llm_intent → first tag with llm_intent → None (no evaluation)
+Cache: each (intent, diff) pair is evaluated exactly once, result stored in watch.
+
+Environment variable overrides (take priority over datastore settings):
+  LLM_MODEL    — model string (e.g. "gpt-4o-mini", "ollama/llama3.2")
+  LLM_API_KEY  — API key for cloud providers
+  LLM_API_BASE — base URL for local/custom endpoints (e.g. http://localhost:11434)
+"""
+
+import hashlib
+import os
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from loguru import logger
+
+from changedetectionio.strtobool import strtobool
+
+from . import client as llm_client
+from .prompt_builder import (
+    build_change_summary_prompt, build_change_summary_system_prompt,
+    build_eval_prompt, build_eval_system_prompt,
+    build_preview_prompt, build_preview_system_prompt,
+    build_setup_prompt, build_setup_system_prompt,
+)
+from .response_parser import parse_eval_response, parse_preview_response, parse_setup_response
+
+from changedetectionio.model.LLMSettings import (
+    LLMSettings,
+    LLM_DEFAULT_MAX_INPUT_CHARS as _DEFAULT_MAX_INPUT_CHARS,
+    LLM_DEFAULT_MAX_SUMMARY_TOKENS,
+    LLM_DEFAULT_THINKING_BUDGET,
+)
+
+
+def is_llm_features_disabled() -> bool:
+    """True when the LLM_FEATURES_DISABLED env var is set to a truthy value."""
+    return bool(strtobool(os.getenv('LLM_FEATURES_DISABLED', '')))
+
+
+def get_llm_settings(datastore) -> LLMSettings:
+    """Hydrate the LLM config dict at settings.application.llm into a validated model.
+
+    Returns a default-constructed LLMSettings when the dict is missing or empty —
+    callers never have to None-check the result. The storage layer remains a plain
+    dict; this is only the validation/typing layer for reads.
+    """
+    cfg = datastore.data.get('settings', {}).get('application', {}).get('llm') or {}
+    return LLMSettings.model_validate(cfg)
+
+
+def _get_max_input_chars(datastore) -> int:
+    """Max input characters to send to the LLM. Resolution: env var → datastore → 100,000.
+    Always returns at least 1 — unlimited is not permitted.
+    """
+    env_val = os.getenv('LLM_MAX_INPUT_CHARS', '').strip()
+    if env_val.isdigit() and int(env_val) > 0:
+        return int(env_val)
+    stored = get_llm_settings(datastore).max_input_chars
+    if stored and stored > 0:
+        return stored
+    return _DEFAULT_MAX_INPUT_CHARS
+
+
+class LLMInputTooLargeError(Exception):
+    pass
+
+
+def _check_input_size(text: str, max_chars: int) -> None:
+    """Raise LLMInputTooLargeError if text exceeds max_chars."""
+    if len(text) > max_chars:
+        raise LLMInputTooLargeError(
+            f"Change too large for AI summary ({len(text):,} chars, limit {max_chars:,})"
+        )
+
+
+def _thinking_extra_body(model: str, budget: int) -> dict | None:
+    """Return litellm extra_body to control thinking for models that support it.
+
+    The `thinkingConfig.thinkingBudget` payload is Gemini-specific (Anthropic and
+    OpenAI reasoning models use different parameters), so we gate on the gemini/
+    provider prefix first, then defer to litellm's model registry for the actual
+    "does this model think?" decision. That picks up new Gemini variants and
+    rolling aliases (`gemini-flash-latest`, etc.) as litellm's registry tracks
+    them, without us hardcoding model names here.
+    """
+    if not model.startswith('gemini/'):
+        return None
+    try:
+        import litellm
+        if not litellm.get_model_info(model).get('supports_reasoning'):
+            return None
+    except Exception:
+        # Unknown model or registry lookup failed — skip the thinking config
+        # rather than guess. Worst case: thinking stays at the provider default.
+        return None
+    return {'generationConfig': {'thinkingConfig': {'thinkingBudget': budget}}}
+
+
+def _cached_system(text: str, model: str = '') -> dict:
+    """Wrap a system prompt, adding Anthropic prompt-caching headers only for Anthropic models.
+    Gemini and other providers have their own caching APIs that break when they receive
+    cache_control, so we only apply it where it's supported.
+    """
+    is_anthropic = model.startswith('claude') or model.startswith('anthropic/')
+    if is_anthropic:
+        return {'role': 'system', 'content': [{'type': 'text', 'text': text, 'cache_control': {'type': 'ephemeral'}}]}
+    return {'role': 'system', 'content': text}
+
+
+# Output-token cap for the JSON-returning calls (intent eval, preview, setup/prefilter).
+# Mirrors client.py's _MAX_COMPLETION_TOKENS so the multiplier helper has a base value
+# to scale; cloud-LLM users hit this default unmodified, preserving prior cost defaults.
+JSON_RESPONSE_MAX_TOKENS = 400
+
+# Default prompt used when the user hasn't configured llm_change_summary.
+# This owns the OUTPUT FORMAT (structure, sections, style, language). The system prompt
+# in prompt_builder.build_change_summary_system_prompt() only covers how to READ the diff.
+# Users can replace this entirely (e.g. "Just tell me the new timestamp.") without
+# fighting hard-coded structure rules from the system prompt.
+DEFAULT_CHANGE_SUMMARY_PROMPT = (
+    "Describe what changed in plain English using these sections, in this fixed order — "
+    "omit a section entirely if there is nothing to report for it:\n"
+    "  Added: ...\n"
+    "  Changed: ...\n"
+    "  Removed: ...\n"
+    "The Removed section MUST always be last. Never place removals before additions or changes.\n\n"
+    "List items as bullet points with key details for each one. Be considerate of the style "
+    "of content you are summarising and adjust your report accordingly.\n"
+    "Do not list standalone timestamps like '3 hours ago', 'Yesterday', '2 minutes ago' as added "
+    "or removed items — they are not meaningful content changes.\n"
+    "For content-heavy pages (news, listings, feeds): quote or paraphrase the specific new "
+    "headlines, items, or entries that were added — do not collapse them into vague phrases "
+    "like 'new articles were added' or 'section was expanded'.\n"
+    "For large blocks of new text (full articles, documents, long paragraphs): briefly summarise "
+    "the substance in 1-2 sentences capturing the key point — do not just repeat the title.\n\n"
+    "Do not quote non-English text verbatim; translate and summarise all content into English. "
+    "Your entire response must be in English."
+)
+
+
+def _summary_max_tokens(diff: str, max_cap: int = LLM_DEFAULT_MAX_SUMMARY_TOKENS) -> int:
+    """Scale completion tokens to diff size: floor 400, ~1 token per 4 chars, ceiling max_cap."""
+    return max(400, min(len(diff) // 4, max_cap))
+
+
+def apply_local_token_multiplier(base_max_tokens: int, llm_cfg: dict) -> int:
+    """
+    Scale max_tokens for endpoints that commonly serve reasoning models
+    (Ollama — self-hosted or ollama.com cloud — and OpenAI-compatible servers like
+    vLLM, LM Studio, llama.cpp).
+
+    Reasoning models (Qwen3, DeepSeek-R1, Gemma 3, etc.) emit chain-of-thought into
+    `message.reasoning_content` BEFORE the final answer lands in `message.content`.
+    Without enough headroom the request truncates mid-thought (`finish_reason='length'`
+    or `'stop'` with empty content) and the answer never lands — callers see an empty
+    string and silently fall through to safe defaults, hiding the problem.
+
+    Cloud providers with stable, non-reasoning defaults (OpenAI, Anthropic, Gemini,
+    OpenRouter) keep their original tight caps so existing users see no behavior or
+    cost change. Ollama / OpenAI-compatible users can dial the multiplier down to 1x
+    in Settings → AI → Provider if they want to keep costs tight on a paid endpoint.
+
+    Activated when `llm_cfg['provider_kind']` is `'ollama'` or `'openai_compatible'`.
+    Multiplier defaults to 5x and is user-configurable in Settings → AI → Provider.
+    """
+    if (llm_cfg or {}).get('provider_kind') not in ('ollama', 'openai_compatible'):
+        return base_max_tokens
+    try:
+        multiplier = int(llm_cfg.get('local_token_multiplier') or 5)
+    except (TypeError, ValueError):
+        multiplier = 5
+    # Clamp to the same 1-20 range the form enforces. Defense-in-depth against
+    # corrupted datastore values that bypassed form validation (manual JSON edits,
+    # future migrations, plugins): a runaway multiplier could otherwise produce
+    # absurdly large max_tokens caps and exhaust local-endpoint memory.
+    multiplier = max(1, min(multiplier, 20))
+    return base_max_tokens * multiplier
+
+
+# ---------------------------------------------------------------------------
+# Intent resolution
+# ---------------------------------------------------------------------------
+
+def resolve_llm_field(watch, datastore, field: str) -> tuple[str, str]:
+    """
+    Generic cascade resolver for any LLM per-watch field.
+    Returns (value, source) where source is 'watch' or tag title.
+    Returns ('', '') if not set anywhere.
+    """
+    value = (watch.get(field) or '').strip()
+    if value:
+        return value, 'watch'
+
+    for tag_uuid in watch.get('tags', []):
+        tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
+        if tag:
+            tag_value = (tag.get(field) or '').strip()
+            if tag_value:
+                return tag_value, tag.get('title', 'tag')
+
+    return '', ''
+
+
+def resolve_intent(watch, datastore) -> tuple[str, str]:
+    """
+    Return (intent, source) where source is 'watch' or tag title.
+    Returns ('', '') if no intent is configured anywhere.
+    """
+    intent = (watch.get('llm_intent') or '').strip()
+    if intent:
+        return intent, 'watch'
+
+    for tag_uuid in watch.get('tags', []):
+        tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
+        if tag:
+            tag_intent = (tag.get('llm_intent') or '').strip()
+            if tag_intent:
+                return tag_intent, tag.get('title', 'tag')
+
+    return '', ''
+
+
+# ---------------------------------------------------------------------------
+# LLM config helper
+# ---------------------------------------------------------------------------
+
+def get_llm_config(datastore) -> dict | None:
+    """
+    Return LLM config dict or None if not configured.
+
+    Resolution order (first non-empty model wins):
+      1. Environment variables: LLM_MODEL, LLM_API_KEY, LLM_API_BASE
+      2. Datastore settings (set via UI)
+    """
+    if is_llm_features_disabled():
+        return None
+    # 1. Environment variable override
+    env_model = os.getenv('LLM_MODEL', '').strip()
+    if env_model:
+        return {
+            'model': env_model,
+            'api_key': os.getenv('LLM_API_KEY', '').strip(),
+            'api_base': os.getenv('LLM_API_BASE', '').strip(),
+        }
+
+    # 2. Datastore settings
+    cfg = datastore.data['settings']['application'].get('llm') or {}
+    if not cfg.get('model'):
+        return None
+    return cfg
+
+
+def llm_configured_via_env() -> bool:
+    """True when LLM config comes from environment variables, not the UI."""
+    if is_llm_features_disabled():
+        return False
+    return bool(os.getenv('LLM_MODEL', '').strip())
+
+
+def _runtime_llm_config(datastore) -> dict | None:
+    """
+    Runtime gate used by every LLM entry point in this module (and the restock
+    fallback). Returns the resolved config dict only when both:
+      - the master 'llm_enabled' toggle is on (default True)
+      - a provider+model is actually configured
+
+    When the toggle is off but a config exists, logs a debug message and returns
+    None so callers fall through their existing "not configured" early-return path.
+
+    The settings UI deliberately still calls get_llm_config() directly so the
+    "AI / LLM configured: ..." badge keeps showing the saved provider even while
+    the toggle is off.
+    """
+    cfg = get_llm_config(datastore)
+    if not get_llm_settings(datastore).enabled:
+        if cfg:
+            logger.debug("LLM features disabled via settings (enabled=False) — skipping LLM lookup")
+        return None
+    return cfg
+
+
+# ---------------------------------------------------------------------------
+# Global monthly token budget
+# ---------------------------------------------------------------------------
+
+def _get_month_key() -> str:
+    """Returns 'YYYY-MM' for the current UTC month."""
+    return datetime.now(timezone.utc).strftime("%Y-%m")
+
+
+def get_global_token_budget_month(datastore=None) -> int:
+    """
+    Monthly token budget ceiling. Resolution order:
+      1. LLM_TOKEN_BUDGET_MONTH env var (takes priority, makes field read-only in UI)
+      2. datastore settings (set via UI)
+    Returns 0 (no limit) if not set anywhere.
+    """
+    try:
+        env_val = int(os.getenv('LLM_TOKEN_BUDGET_MONTH', '0'))
+        if env_val > 0:
+            return env_val
+    except (ValueError, TypeError):
+        pass
+    if datastore is not None:
+        try:
+            stored = datastore.data['settings']['application'].get('llm') or {}
+            val = int(stored.get('token_budget_month') or 0)
+            return max(0, val)
+        except (ValueError, TypeError):
+            pass
+    return 0
+
+
+def _estimate_cost_usd(model: str, input_tokens: int, output_tokens: int) -> float:
+    """
+    Return estimated cost in USD using litellm's pricing database.
+    Returns 0.0 for unknown models (local/Ollama/custom endpoints).
+    Never raises — cost estimation is best-effort.
+    """
+    if not model or (not input_tokens and not output_tokens):
+        return 0.0
+    try:
+        from litellm.cost_calculator import cost_per_token
+        prompt_cost, completion_cost = cost_per_token(
+            model=model,
+            prompt_tokens=input_tokens,
+            completion_tokens=output_tokens,
+        )
+        return float(prompt_cost + completion_cost)
+    except Exception:
+        return 0.0
+
+
+def accumulate_global_tokens(datastore, tokens: int,
+                              input_tokens: int = 0, output_tokens: int = 0,
+                              model: str = '') -> None:
+    """
+    Add *tokens* to both the all-time and this-month global counters.
+    When input_tokens / output_tokens / model are supplied the estimated
+    USD cost is accumulated alongside the token counts.
+    Resets monthly counters automatically on month rollover.
+
+    These counters live at datastore.data['settings']['application']['llm']
+    and are intentionally read-only from the API/form side — they are only
+    ever written here, in a controlled way.
+    """
+    if tokens <= 0:
+        return
+
+    current_month = _get_month_key()
+    cost = _estimate_cost_usd(model, input_tokens, output_tokens)
+    settings = get_llm_settings(datastore)
+
+    # Month rollover: reset monthly counters
+    if settings.tokens_month_key != current_month:
+        settings.tokens_this_month = 0
+        settings.cost_usd_this_month = 0.0
+        settings.tokens_month_key = current_month
+
+    settings.tokens_total_cumulative += tokens
+    settings.tokens_this_month       += tokens
+    settings.cost_usd_total_cumulative += cost
+    settings.cost_usd_this_month       += cost
+
+    # Round-trip through model_dump so storage stays a plain dict and the schema
+    # contract (extra='forbid', type coercion) is re-enforced on every write.
+    datastore.data['settings']['application']['llm'] = settings.model_dump()
+    datastore.commit()
+
+
+def is_global_token_budget_exceeded(datastore) -> bool:
+    """
+    Returns True when a monthly token budget is configured (via
+    LLM_TOKEN_BUDGET_MONTH) and the current month's usage has reached
+    or exceeded that budget.
+    """
+    budget = get_global_token_budget_month(datastore)
+    if not budget:
+        return False
+
+    llm_cfg = datastore.data['settings']['application'].get('llm') or {}
+    if llm_cfg.get('tokens_month_key') != _get_month_key():
+        # Counter hasn't been updated yet this month → zero usage
+        return False
+
+    return (llm_cfg.get('tokens_this_month') or 0) >= budget
+
+
+# ---------------------------------------------------------------------------
+# One-time setup: derive pre-filter
+# ---------------------------------------------------------------------------
+
+def _check_token_budget(watch, cfg, tokens_this_call: int = 0) -> bool:
+    """
+    Per-watch per-period token cap.
+
+    Period is currently month (matches the global counter rollover); the field
+    name `max_tokens_per_count_period` is period-agnostic so a configurable
+    day/week/month can land later without renaming storage.
+
+    On non-zero tokens_this_call:
+      - rolls over watch['llm_tokens_this_period'] if a new period started
+      - increments the per-period counter
+      - also increments the existing lifetime counter (UI stat, unchanged)
+    Returns False once the per-period counter exceeds max_tokens_per_count_period
+    so subsequent evaluate_change calls bail out for this watch until rollover.
+
+    Note: only evaluate_change actually gates on the return value (the other
+    callers invoke this for the side-effect of accumulating tokens).
+    """
+    if tokens_this_call > 0:
+        current_period = _get_month_key()
+        # Rollover: new period zeroes the per-period counter
+        if watch.get('llm_tokens_period_key') != current_period:
+            watch['llm_tokens_this_period'] = 0
+            watch['llm_tokens_period_key'] = current_period
+        watch['llm_tokens_this_period'] = (watch.get('llm_tokens_this_period') or 0) + tokens_this_call
+        # Informational lifetime counter (UI shows this; not used for the cap)
+        watch['llm_tokens_used_cumulative'] = (watch.get('llm_tokens_used_cumulative') or 0) + tokens_this_call
+
+    max_per_period = int(cfg.get('max_tokens_per_count_period') or 0)
+    if max_per_period:
+        # Pre-flight (tokens_this_call=0) and post-call paths both read the
+        # same counter — but a stale period key means "no usage yet this period".
+        if watch.get('llm_tokens_period_key') == _get_month_key():
+            total = watch.get('llm_tokens_this_period') or 0
+            if total > max_per_period:
+                logger.warning(
+                    f"LLM per-period token budget exceeded for {watch.get('uuid')}: "
+                    f"{total} tokens > limit {max_per_period}"
+                )
+                return False
+
+    return True
+
+
+def run_setup(watch, datastore, snapshot_text: str) -> None:
+    """
+    Ask the LLM whether a CSS pre-filter would improve precision for this intent.
+    Stores result in watch['llm_prefilter'] (str selector or None).
+    Called once when intent is first set, and again if pre-filter returns zero matches.
+    """
+    cfg = _runtime_llm_config(datastore)
+    if not cfg:
+        return
+
+    intent, _ = resolve_intent(watch, datastore)
+    if not intent:
+        return
+
+    url = watch.get('url', '')
+    system_prompt = build_setup_system_prompt()
+    user_prompt = build_setup_prompt(intent, snapshot_text, url=url)
+    settings = get_llm_settings(datastore)
+
+    try:
+        raw, tokens, *_ = llm_client.completion(
+            model=cfg['model'],
+            messages=[
+                _cached_system(system_prompt, model=cfg['model']),
+                {'role': 'user', 'content': user_prompt},
+            ],
+            api_key=cfg.get('api_key'),
+            api_base=cfg.get('api_base'),
+            max_tokens=apply_local_token_multiplier(JSON_RESPONSE_MAX_TOKENS, cfg),
+            extra_body=_thinking_extra_body(cfg['model'], settings.thinking_budget),
+            debug=settings.debug,
+        )
+        _check_token_budget(watch, cfg, tokens)
+        accumulate_global_tokens(datastore, tokens, model=cfg['model'])
+        result = parse_setup_response(raw)
+        watch['llm_prefilter'] = result['selector']
+        logger.debug(f"LLM setup for {watch.get('uuid')}: prefilter={result['selector']} reason={result['reason']}")
+    except Exception as e:
+        logger.warning(f"LLM setup call failed for {watch.get('uuid')}: {e}")
+        watch['llm_prefilter'] = None
+
+
+# ---------------------------------------------------------------------------
+# AI Change Summary — human-readable description of what changed
+# ---------------------------------------------------------------------------
+
+def get_effective_summary_prompt(watch, datastore) -> str:
+    """Return the prompt that summarise_change will use.
+
+    Cascade: watch → tag → global settings default → hardcoded fallback.
+    """
+    prompt, _ = resolve_llm_field(watch, datastore, 'llm_change_summary')
+    if prompt:
+        return prompt
+    global_default = get_llm_settings(datastore).change_summary_default.strip()
+    return global_default or DEFAULT_CHANGE_SUMMARY_PROMPT
+
+
+def compute_summary_cache_key(diff_text: str, prompt: str) -> str:
+    """Stable 16-char hex key for a (diff, prompt) pair.  Stored alongside the summary file."""
+    h = hashlib.md5()
+    h.update(diff_text.encode('utf-8', errors='replace'))
+    h.update(b'\x00')
+    h.update(prompt.encode('utf-8', errors='replace'))
+    return h.hexdigest()[:16]
+
+
+@dataclass(frozen=True)
+class DiffPrefs:
+    """
+    User-facing diff display preferences. Part of the LLM summary cache key so
+    that toggling a preference produces a fresh summary.
+
+    Field defaults are the single source of truth — the UI query-arg defaults in
+    diff.py's from_request_args() and the worker pre-cache's bare DiffPrefs()
+    both rely on these.
+    """
+    all_changes:       bool = False
+    ignore_whitespace: bool = False
+    show_removed:      bool = True
+    show_added:        bool = True
+
+    @classmethod
+    def from_request_args(cls, args) -> 'DiffPrefs':
+        """Parse from a Flask request.args (or any .get(key, default)-shaped mapping)."""
+        return cls(
+            all_changes       = args.get('all_changes', '0') == '1',
+            ignore_whitespace = args.get('ignore_whitespace', '0') == '1',
+            show_removed      = args.get('removed', '1') == '1',
+            show_added        = args.get('added', '1') == '1',
+        )
+
+    def cache_key_suffix(self) -> str:
+        return (
+            f'\x00prefs:all={int(self.all_changes)},ws={int(self.ignore_whitespace)}'
+            f',rm={int(self.show_removed)},add={int(self.show_added)}'
+        )
+
+
+def build_summary_cache_prompt(effective_prompt: str, max_summary_tokens: int,
+                                prefs: DiffPrefs = None, model: str = '') -> str:
+    """
+    Compose the full cache-key string passed to save/get_llm_diff_summary.
+
+    Default prefs are DiffPrefs() — must match the UI's query-arg defaults so a
+    worker-side pre-cache is hit by an unmodified UI request. Same helper must
+    be used by both the worker pre-cache write and the UI diff route read,
+    otherwise the prompt hashes diverge and the cache file isn't found.
+
+    The active model name is folded into the key so switching models
+    (e.g. qwen3 → gpt-4o) invalidates stale summaries that were generated
+    by a different model with potentially different phrasing/quality.
+    """
+    if prefs is None:
+        prefs = DiffPrefs()
+    return (
+        effective_prompt
+        + prefs.cache_key_suffix()
+        + f'\x00sys:{build_change_summary_system_prompt()}'
+        + f'\x00max_tokens:{max_summary_tokens}'
+        + f'\x00model:{model}'
+    )
+
+
+def summarise_change(watch, datastore, diff: str, current_snapshot: str = '') -> str:
+    """
+    Generate a plain-language summary of the change using the watch's
+    llm_change_summary prompt (cascades from tag if not set on watch).
+
+    Returns the summary string, or '' on failure.
+    The result replaces {{ diff }} in notifications so the user gets a
+    readable description instead of raw +/- diff lines.
+    """
+    cfg = _runtime_llm_config(datastore)
+    if not cfg:
+        return ''
+
+    if is_global_token_budget_exceeded(datastore):
+        budget = get_global_token_budget_month(datastore)
+        llm_cfg = datastore.data['settings']['application'].get('llm') or {}
+        used = llm_cfg.get('tokens_this_month', 0)
+        logger.warning(
+            f"LLM summarise_change skipped: monthly budget {budget:,} reached "
+            f"({used:,} used this month)"
+        )
+        return ''
+
+    custom_prompt = get_effective_summary_prompt(watch, datastore)
+    if not diff.strip():
+        return ''
+
+    _check_input_size(diff, _get_max_input_chars(datastore))
+    url = watch.get('url', '')
+    title = watch.get('page_title') or watch.get('title') or ''
+
+    system_prompt = build_change_summary_system_prompt()
+    user_prompt = build_change_summary_prompt(
+        diff=diff,
+        custom_prompt=custom_prompt,
+        current_snapshot=current_snapshot,
+        url=url,
+        title=title,
+    )
+
+    settings = get_llm_settings(datastore)
+    _extra_body = _thinking_extra_body(cfg['model'], settings.thinking_budget)
+
+    try:
+        _resp = llm_client.completion(
+            model=cfg['model'],
+            messages=[
+                _cached_system(system_prompt, model=cfg['model']),
+                {'role': 'user', 'content': user_prompt},
+            ],
+            api_key=cfg.get('api_key'),
+            api_base=cfg.get('api_base'),
+            max_tokens=apply_local_token_multiplier(
+                _summary_max_tokens(diff, max_cap=settings.max_summary_tokens),
+                cfg,
+            ),
+            extra_body=_extra_body,
+            debug=settings.debug,
+        )
+        raw, tokens = _resp[0], _resp[1]
+        input_tokens  = _resp[2] if len(_resp) > 2 else 0
+        output_tokens = _resp[3] if len(_resp) > 3 else 0
+        summary = raw.strip()
+        _check_token_budget(watch, cfg, tokens)
+        watch['llm_last_tokens_used'] = tokens
+        watch['llm_tokens_used_cumulative'] = (watch.get('llm_tokens_used_cumulative') or 0) + tokens
+        accumulate_global_tokens(datastore, tokens,
+                                 input_tokens=input_tokens,
+                                 output_tokens=output_tokens,
+                                 model=cfg['model'])
+        logger.debug(
+            f"LLM change summary {watch.get('uuid')}: tokens={tokens} "
+            f"summary={summary[:80]}"
+        )
+        return summary
+    except Exception as e:
+        raise
+
+
+# ---------------------------------------------------------------------------
+# Live-preview extraction (current content, no diff)
+# ---------------------------------------------------------------------------
+
+def preview_extract(watch, datastore, content: str) -> dict | None:
+    """
+    For the live-preview endpoint: extract relevant information from the
+    *current* page content according to the watch's intent.
+
+    Unlike evaluate_change (which compares a diff), this asks the LLM to
+    directly answer the intent against the current snapshot — giving the user
+    immediate feedback like "30 articles listed" or "Price: $149, 25% off".
+
+    Returns {'found': bool, 'answer': str} or None if LLM not configured / no intent.
+    """
+    cfg = _runtime_llm_config(datastore)
+    if not cfg:
+        return None
+
+    intent, _ = resolve_intent(watch, datastore)
+    if not intent or not content.strip():
+        return None
+
+    _check_input_size(content, _get_max_input_chars(datastore))
+    url = watch.get('url', '')
+    title = watch.get('page_title') or watch.get('title') or ''
+
+    system_prompt = build_preview_system_prompt()
+    user_prompt = build_preview_prompt(intent, content, url=url, title=title)
+    settings = get_llm_settings(datastore)
+
+    try:
+        raw, tokens, *_ = llm_client.completion(
+            model=cfg['model'],
+            messages=[
+                _cached_system(system_prompt, model=cfg['model']),
+                {'role': 'user', 'content': user_prompt},
+            ],
+            api_key=cfg.get('api_key'),
+            api_base=cfg.get('api_base'),
+            max_tokens=apply_local_token_multiplier(JSON_RESPONSE_MAX_TOKENS, cfg),
+            extra_body=_thinking_extra_body(cfg['model'], settings.thinking_budget),
+            debug=settings.debug,
+        )
+        accumulate_global_tokens(datastore, tokens, model=cfg['model'])
+        result = parse_preview_response(raw)
+        logger.debug(
+            f"LLM preview {watch.get('uuid')}: found={result['found']} "
+            f"tokens={tokens} answer={result['answer'][:80]}"
+        )
+        return result
+    except Exception as e:
+        logger.warning(f"LLM preview extraction failed for {watch.get('uuid')}: {e}")
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Per-change evaluation
+# ---------------------------------------------------------------------------
+
+def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '') -> dict | None:
+    """
+    Evaluate whether `diff` matches the watch's intent.
+    Returns {'important': bool, 'summary': str} or None if LLM not configured / no intent.
+
+    Results are cached by (intent, diff) hash — each unique diff is evaluated exactly once.
+    """
+    cfg = _runtime_llm_config(datastore)
+    if not cfg:
+        return None
+
+    intent, source = resolve_intent(watch, datastore)
+    if not intent:
+        return None
+
+    if not diff or not diff.strip():
+        return {'important': False, 'summary': ''}
+
+    _check_input_size(diff, _get_max_input_chars(datastore))
+
+    # Cache lookup — evaluations are deterministic once cached
+    cache_key = hashlib.sha256(f"{intent}||{diff}".encode()).hexdigest()
+    cache = watch.get('llm_evaluation_cache') or {}
+    if cache_key in cache:
+        logger.debug(f"LLM cache hit for {watch.get('uuid')} key={cache_key[:8]}")
+        return cache[cache_key]
+
+    # Check global monthly budget before making the call
+    if is_global_token_budget_exceeded(datastore):
+        budget = get_global_token_budget_month(datastore)
+        llm_cfg = datastore.data['settings']['application'].get('llm') or {}
+        used = llm_cfg.get('tokens_this_month', 0)
+        logger.warning(
+            f"LLM evaluate_change skipped for {watch.get('uuid')}: monthly budget {budget:,} reached "
+            f"({used:,} used this month) — passing change through as important"
+        )
+        # Fail open: don't suppress notifications when budget is exhausted
+        return {'important': True, 'summary': ''}
+
+    # Check per-watch cumulative budget before making the call
+    if not _check_token_budget(watch, cfg):
+        # Already over budget — fail open (don't suppress notification)
+        return {'important': True, 'summary': ''}
+
+    url = watch.get('url', '')
+    title = watch.get('page_title') or watch.get('title') or ''
+
+    system_prompt = build_eval_system_prompt()
+    user_prompt = build_eval_prompt(
+        intent=intent,
+        diff=diff,
+        current_snapshot=current_snapshot,
+        url=url,
+        title=title,
+    )
+
+    settings = get_llm_settings(datastore)
+    try:
+        _resp = llm_client.completion(
+            model=cfg['model'],
+            messages=[
+                _cached_system(system_prompt, model=cfg['model']),
+                {'role': 'user', 'content': user_prompt},
+            ],
+            api_key=cfg.get('api_key'),
+            api_base=cfg.get('api_base'),
+            max_tokens=apply_local_token_multiplier(JSON_RESPONSE_MAX_TOKENS, cfg),
+            extra_body=_thinking_extra_body(cfg['model'], settings.thinking_budget),
+            debug=settings.debug,
+        )
+        raw, tokens = _resp[0], _resp[1]
+        input_tokens  = _resp[2] if len(_resp) > 2 else 0
+        output_tokens = _resp[3] if len(_resp) > 3 else 0
+        result = parse_eval_response(raw)
+    except Exception as e:
+        logger.warning(f"LLM evaluation failed for {watch.get('uuid')}: {e}")
+        # On failure: don't suppress the notification — pass through as important
+        watch['llm_last_tokens_used'] = 0
+        return {'important': True, 'summary': ''}
+
+    # Accumulate token usage: per-watch limit and global monthly budget
+    _check_token_budget(watch, cfg, tokens)
+    watch['llm_last_tokens_used'] = tokens
+    accumulate_global_tokens(datastore, tokens,
+                             input_tokens=input_tokens,
+                             output_tokens=output_tokens,
+                             model=cfg['model'])
+
+    # Store in cache
+    if 'llm_evaluation_cache' not in watch or watch['llm_evaluation_cache'] is None:
+        watch['llm_evaluation_cache'] = {}
+    watch['llm_evaluation_cache'][cache_key] = result
+
+    logger.debug(
+        f"LLM eval {watch.get('uuid')} (intent from {source}): "
+        f"important={result['important']} tokens={tokens} summary={result['summary'][:80]}"
+    )
+    return result
@@ -0,0 +1,216 @@
+"""
+Prompt construction for LLM evaluation calls.
+Pure functions — no side effects, fully testable.
+"""
+
+import re
+
+from .bm25_trim import trim_to_relevant
+
+_AGO_RE = re.compile(r'^\d+\s+\w+\s+ago$', re.IGNORECASE)
+
+SNAPSHOT_CONTEXT_CHARS = 3_000   # current page state excerpt sent alongside the diff
+
+
+def _annotate_moved_lines(diff_text: str) -> str:
+    """
+    Pre-process a unified diff to mark lines that appear on both the + and - sides
+    as [MOVED] rather than genuinely added/removed. This prevents the LLM from
+    incorrectly classifying repositioned content as new or deleted.
+
+    Lines are compared after stripping leading +/- and whitespace so that
+    indentation changes don't prevent matching.
+    """
+    lines = diff_text.splitlines()
+    added_texts   = {l[1:].strip().lower() for l in lines if l.startswith('+') and l[1:].strip()}
+    removed_texts = {l[1:].strip().lower() for l in lines if l.startswith('-') and l[1:].strip()}
+    moved_texts   = added_texts & removed_texts
+
+    if not moved_texts:
+        return diff_text
+
+    result = []
+    for line in lines:
+        if line.startswith(('+', '-')):
+            bare = line[1:].strip().lower()
+            if bare in moved_texts or _AGO_RE.match(line[1:].strip()):
+                result.append(f'~{line[1:]}')  # ~ prefix = moved/reordered/trivial, skip
+                continue
+        result.append(line)
+    return '\n'.join(result)
+
+
+def build_eval_prompt(intent: str, diff: str, current_snapshot: str = '',
+                      url: str = '', title: str = '') -> str:
+    """
+    Build the user message for a diff evaluation call.
+    The system prompt is kept separate (see build_eval_system_prompt).
+    """
+    parts = []
+
+    if url:
+        parts.append(f"URL: {url}")
+    if title:
+        parts.append(f"Page title: {title}")
+
+    parts.append(f"Intent: {intent}")
+
+    if current_snapshot:
+        excerpt = trim_to_relevant(current_snapshot, intent, max_chars=SNAPSHOT_CONTEXT_CHARS)
+        if excerpt:
+            parts.append(f"\nCurrent page state (relevant excerpt):\n{excerpt}")
+
+    parts.append(f"\nWhat changed (diff):\n{diff}")
+
+    return '\n'.join(parts)
+
+
+def build_eval_system_prompt() -> str:
+    return (
+        "You are a precise, reliable website-change evaluator for a monitoring tool.\n"
+        "Your job is to read a unified diff and decide whether it matches a user's stated intent.\n"
+        "Accuracy is critical — false positives waste the user's attention; false negatives miss what they care about.\n\n"
+        "Diff format:\n"
+        "- Lines starting with '+' are newly ADDED content\n"
+        "- Lines starting with '-' are REMOVED content\n"
+        "- Lines starting with ' ' (space) are unchanged context\n\n"
+        "Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
+        '{"important": true/false, "summary": "one sentence describing the relevant change, or why it doesn\'t match"}\n\n'
+        "Rules:\n"
+        "- important=true ONLY when the diff clearly and specifically matches the intent — be strict\n"
+        "- Pay close attention to direction: an intent about price drops means removed (-) prices and added (+) lower prices\n"
+        "- The user's intent always wins. If the intent explicitly asks about timestamps, numbers, counters, "
+        "thresholds, or any specific value (e.g. 'when the timestamp is greater than 1778599592', "
+        "'when stock count > 5'), evaluate the diff against that intent — do NOT dismiss it as cosmetic.\n"
+        "- Otherwise: empty, trivial, or genuinely cosmetic diffs (heartbeat timestamps, view counters, "
+        "whitespace, navigation tweaks) default to important=false\n"
+        "- For numeric comparisons in the intent, parse the values explicitly and compare them — "
+        "do not eyeball or round\n"
+        "- If the same text appears in both removed (-) and added (+) lines the content has likely just "
+        "shifted or been reordered. Treat pure reordering as important=false unless the intent "
+        "explicitly asks about order or position.\n"
+        "- Use OR logic when the intent lists multiple triggers — any one matching is sufficient\n"
+        "- When uncertain whether a change truly matches, prefer important=false and explain why in the summary\n"
+        "- Summary must be in the same language as the intent\n"
+        "- If important=false, the summary must clearly explain what changed and why it does not match"
+    )
+
+
+def build_preview_prompt(intent: str, content: str, url: str = '', title: str = '') -> str:
+    """
+    Build the user message for a live-preview extraction call.
+    Unlike build_eval_prompt (which analyses a diff), this asks the LLM to
+    extract relevant information from the *current* page content — giving the
+    user a direct answer to their intent so they can verify it makes sense
+    before saving.
+    """
+    parts = []
+    if url:
+        parts.append(f"URL: {url}")
+    if title:
+        parts.append(f"Page title: {title}")
+    parts.append(f"Intent / question: {intent}")
+    parts.append(f"\nPage content:\n{content[:6_000]}")
+    return '\n'.join(parts)
+
+
+def build_preview_system_prompt() -> str:
+    return (
+        "You are a precise, detail-oriented web page content analyst for a website monitoring tool.\n"
+        "Given the user's intent or question and the current page content, extract and directly answer "
+        "what the intent is looking for. Never guess or paraphrase — report only what the page actually contains.\n\n"
+        "Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
+        '{"found": true/false, "answer": "concise direct answer or extraction"}\n\n'
+        "Rules:\n"
+        "- found=true when the page clearly contains something relevant to the intent\n"
+        "- answer must directly address the intent with specific values where possible "
+        "(e.g. for 'current price?' → '$149.99', not 'a price is shown')\n"
+        "- answer must be in the same language as the intent\n"
+        "- Keep answer brief — one or two sentences maximum\n"
+        "- If found=false, briefly state what the page contains instead"
+    )
+
+
+def build_change_summary_prompt(diff: str, custom_prompt: str,
+                                current_snapshot: str = '', url: str = '', title: str = '') -> str:
+    """
+    Build the user message for an AI Change Summary call.
+    The user supplies their own instructions (custom_prompt); this wraps them
+    with the diff (which carries its own surrounding context via unified_diff's
+    n=3 context lines, marked '~' by _annotate_moved_lines).
+
+    NOTE: current_snapshot is accepted for caller compatibility but intentionally
+    unused. A wholesale page excerpt caused the LLM to report unchanged page
+    content (e.g. old release-note bullets) as "what changed" — hallucinations
+    drawn from the excerpt rather than the diff. The in-diff context lines give
+    the model enough surrounding text to describe each change accurately.
+    """
+    parts = []
+    if url:
+        parts.append(f"URL: {url}")
+    if title:
+        parts.append(f"Page title: {title}")
+    parts.append(f"Instructions: {custom_prompt}")
+    parts.append(f"\nWhat changed (diff):\n{_annotate_moved_lines(diff)}")
+    return '\n'.join(parts)
+
+
+def build_change_summary_system_prompt() -> str:
+    """
+    Universal, format-agnostic instructions: how to READ a diff and accuracy rules.
+    All output-format choices (prose vs JSON, sections, bullets, language, length)
+    are owned by the user prompt — including the default in
+    DEFAULT_CHANGE_SUMMARY_PROMPT — so that a user replacing the user-prompt
+    (e.g. asking for raw JSON) is not overridden by hard-coded format rules here.
+    """
+    return (
+        "You analyse a unified-diff document showing how a monitored web page changed, "
+        "and produce exactly the output the user asks for.\n\n"
+        "Rules for reading the diff:\n"
+        "- Lines starting with + are genuinely new content.\n"
+        "- Lines starting with - are genuinely removed content.\n"
+        "- Lines starting with ~ have been PRE-IDENTIFIED as moved/reordered or trivial — "
+        "the same text exists on both sides of the diff, or the line is a standalone timestamp. "
+        "Do NOT treat ~ lines as added or removed.\n\n"
+        "Accuracy: only report what the +/- lines actually contain. Never invent details, "
+        "never speculate, never add information that isn't in the diff.\n\n"
+        "Follow the user's instructions exactly — including the requested output format "
+        "(plain text, JSON, Markdown, single value, etc.), structure, language, and length. "
+        "Do not add preamble, meta-commentary, or self-introduction. Produce only the output "
+        "the user asked for — nothing before it, nothing after it."
+    )
+
+
+def build_setup_prompt(intent: str, snapshot_text: str, url: str = '') -> str:
+    """
+    Build the prompt for the one-time setup call that decides whether
+    a CSS pre-filter would improve evaluation precision.
+    """
+    excerpt = trim_to_relevant(snapshot_text, intent, max_chars=4_000)
+
+    parts = []
+    if url:
+        parts.append(f"URL: {url}")
+    parts.append(f"Intent: {intent}")
+    parts.append(f"\nPage content excerpt:\n{excerpt}")
+
+    return '\n'.join(parts)
+
+
+def build_setup_system_prompt() -> str:
+    return (
+        "You help configure a website change monitor.\n"
+        "Given a monitoring intent and a sample of the page content, decide if a CSS pre-filter "
+        "would improve evaluation precision by scoping the content to a specific structural section.\n\n"
+        "Respond with ONLY a JSON object:\n"
+        '{"needs_prefilter": true/false, "selector": "CSS selector or null", "reason": "one sentence"}\n\n'
+        "Rules:\n"
+        "- Only recommend a pre-filter when the intent references a specific structural section "
+        "(e.g. 'footer', 'sidebar', 'nav', 'header', 'main', 'article') OR the page clearly "
+        "has high-noise sections unrelated to the intent\n"
+        "- Use ONLY semantic element selectors: footer, nav, header, main, article, aside, "
+        "or attribute-based like [id*='price'], [class*='sidebar'] — NEVER positional selectors "
+        "like div:nth-child(3) or //*[2]\n"
+        "- Default to needs_prefilter=false — most intents don't need one\n"
+        "- selector must be null when needs_prefilter=false"
+    )
@@ -0,0 +1,84 @@
+"""
+Parse and validate LLM JSON responses.
+Pure functions — no side effects, fully testable.
+
+LLMs occasionally return JSON wrapped in markdown fences or with trailing
+text. This module handles those cases gracefully.
+"""
+
+import json
+import re
+
+# Positional selectors are fragile — reject them even if the LLM generates them
+_POSITIONAL_SELECTOR_RE = re.compile(
+    r'nth-child|nth-of-type|:eq\(|\[\d+\]|\/\/\*\[\d',
+    re.IGNORECASE
+)
+
+
+def _extract_json(raw: str) -> str:
+    """Strip markdown fences and extract the first JSON object."""
+    raw = raw.strip()
+    # Remove ```json ... ``` or ``` ... ``` fences
+    raw = re.sub(r'^```(?:json)?\s*', '', raw, flags=re.MULTILINE)
+    raw = re.sub(r'\s*```$', '', raw, flags=re.MULTILINE)
+    # Find the first { ... } block
+    match = re.search(r'\{.*\}', raw, re.DOTALL)
+    return match.group(0) if match else raw
+
+
+def parse_eval_response(raw: str) -> dict:
+    """
+    Parse a diff evaluation response.
+    Returns {'important': bool, 'summary': str}.
+    Falls back to important=False on any parse error.
+    """
+    try:
+        data = json.loads(_extract_json(raw))
+        return {
+            'important': bool(data.get('important', False)),
+            'summary': str(data.get('summary', '')).strip(),
+        }
+    except (json.JSONDecodeError, AttributeError):
+        return {'important': False, 'summary': ''}
+
+
+def parse_preview_response(raw: str) -> dict:
+    """
+    Parse a live-preview extraction response.
+    Returns {'found': bool, 'answer': str}.
+    Falls back to found=False on any parse error.
+    """
+    try:
+        data = json.loads(_extract_json(raw))
+        return {
+            'found': bool(data.get('found', False)),
+            'answer': str(data.get('answer', '')).strip(),
+        }
+    except (json.JSONDecodeError, AttributeError):
+        return {'found': False, 'answer': ''}
+
+
+def parse_setup_response(raw: str) -> dict:
+    """
+    Parse a setup/pre-filter decision response.
+    Returns {'needs_prefilter': bool, 'selector': str|None, 'reason': str}.
+    Rejects positional selectors even if the LLM generates them.
+    """
+    try:
+        data = json.loads(_extract_json(raw))
+        needs = bool(data.get('needs_prefilter', False))
+        selector = data.get('selector') or None
+
+        # Sanitise: reject positional selectors
+        if selector and _POSITIONAL_SELECTOR_RE.search(selector):
+            selector = None
+            needs = False
+
+        return {
+            'needs_prefilter': needs,
+            'selector': selector if needs else None,
+            'reason': str(data.get('reason', '')).strip(),
+        }
+    except (json.JSONDecodeError, AttributeError):
+        return {'needs_prefilter': False, 'selector': None, 'reason': ''}
@@ -0,0 +1,18 @@
+"""
+Shared UI placeholder strings for LLM fields.
+
+Used by WTForms field definitions in forms.py and blueprint/tags/form.py.
+Templates use their own _()-translated variants but should stay in sync with these.
+"""
+
+# llm_intent field — placeholder text for per-watch context
+LLM_INTENT_WATCH_PLACEHOLDER = (
+    "e.g. Alert me when the price drops below $300, or a new product is launched. "
+    "Ignore footer and navigation changes."
+)
+
+# llm_intent field — placeholder text for tag/group context
+LLM_INTENT_TAG_PLACEHOLDER = (
+    "e.g. Flag price changes or new product launches across all watches in this group"
+)
+
@@ -12,6 +12,7 @@ from changedetectionio.notification import (

 # Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification
 _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
+DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'



@@ -30,6 +31,10 @@ class model(dict):
                    'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
                    'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds
                    'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")),  # Number of threads, lower is better for slow connections
+                    'default_ua': {
+                        'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
+                        'html_webdriver': None,
+                    }
                },
                'application': {
                    # Custom notification content
@@ -38,9 +43,7 @@ class model(dict):
                    'api_access_token_enabled': True,
                    'base_url' : None,
                    'empty_pages_are_a_change': False,
-                    'browser_profile': None,           # machine-name of the system-default BrowserProfile
-                    'browser_profiles': {},            # user-defined profiles keyed by machine name
-                    'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "requests"),
+                    'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
                    'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
                    'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
                    'global_subtractive_selectors': [],
@@ -67,6 +70,9 @@ class model(dict):
                    'shared_diff_access': False,
                    'strip_ignored_lines': False,
                    'tags': None,  # Initialized in __init__ with real datastore_path
+                    # All LLM settings now live nested under application.llm.* (post-migration update_31).
+                    # Defaults come from LLMSettings.model_validate({}) at read time —
+                    # no need to pre-seed an empty {} here.
                    'webdriver_delay': None , # Extra delay in seconds before extracting text
                    'ui': {
                        'use_page_title_in_list': True,
@@ -0,0 +1,65 @@
+"""
+Validation/typing layer for the LLM config dict stored at
+    datastore.data['settings']['application']['llm']
+
+Storage stays a plain dict (orjson-serialized). This model is hydrated on read
+(model_validate) and dumped on write (model_dump). WTForms field names match
+the storage field names exactly — no aliases needed.
+"""
+from typing import ClassVar, Tuple
+
+from pydantic import BaseModel, ConfigDict
+
+
+LLM_DEFAULT_THINKING_BUDGET = 0
+LLM_DEFAULT_MAX_SUMMARY_TOKENS = 3000
+LLM_DEFAULT_LOCAL_TOKEN_MULTIPLIER = 5
+LLM_DEFAULT_MAX_INPUT_CHARS = 100_000
+LLM_DEFAULT_BUDGET_ACTION = 'skip_llm'
+
+
+class LLMSettings(BaseModel):
+    # extra='forbid' rejects any key that isn't a declared field with a
+    # ValidationError. Loud failure forces new form fields to be declared here
+    # before they can land in storage — closes the CWE-915 mass-assignment class
+    # of bugs (see GHSA-h3x5-5j56-hm2j for the canonical example).
+    model_config = ConfigDict(extra='forbid')
+
+    enabled: bool = True
+    debug: bool = False
+    override_diff_with_summary: bool = True
+    restock_use_fallback_extract: bool = True
+    thinking_budget: int = LLM_DEFAULT_THINKING_BUDGET
+    max_summary_tokens: int = LLM_DEFAULT_MAX_SUMMARY_TOKENS
+    budget_action: str = LLM_DEFAULT_BUDGET_ACTION
+    change_summary_default: str = ''
+    token_budget_month: int = 0
+    max_input_chars: int = LLM_DEFAULT_MAX_INPUT_CHARS
+    # Per-watch per-period token cap; read by _check_token_budget() in evaluator.py.
+    # 0 means unlimited. Once a watch's usage within the current period hits this cap,
+    # AI evaluation is skipped for it until the period rolls over. Period is currently
+    # hard-coded to month (matches the global counter rollover); name is period-agnostic
+    # to leave room for a configurable period (day/week/month) later.
+    max_tokens_per_count_period: int = 0
+
+    model: str = ''
+    api_key: str = ''
+    api_base: str = ''
+    provider_kind: str = ''
+    local_token_multiplier: int = LLM_DEFAULT_LOCAL_TOKEN_MULTIPLIER
+
+    tokens_total_cumulative: int = 0
+    tokens_this_month: int = 0
+    tokens_month_key: str = ''
+    cost_usd_total_cumulative: float = 0.0
+    cost_usd_this_month: float = 0.0
+
+    # Provider-connection fields wiped on /llm/clear and when the model is emptied.
+    CONNECTION_FIELDS: ClassVar[Tuple[str, ...]] = (
+        'model', 'api_key', 'api_base', 'provider_kind', 'local_token_multiplier',
+    )
+    # Runtime-managed counters — form submissions must never overwrite these.
+    PROTECTED_FIELDS: ClassVar[Tuple[str, ...]] = (
+        'tokens_total_cumulative', 'tokens_this_month', 'tokens_month_key',
+        'cost_usd_total_cumulative', 'cost_usd_this_month',
+    )
@@ -0,0 +1,239 @@
+# Pydantic Migration
+
+Plan for incrementally moving the app's storage dicts behind Pydantic models. Driven by
+security (CWE-915 mass-assignment, see [GHSA-h3x5-5j56-hm2j][advisory]) and schema
+enforcement, not just type tidying.
+
+[advisory]: https://github.com/dgtlmoon/changedetection.io/security/advisories/GHSA-h3x5-5j56-hm2j
+
+## The goal
+
+Every form/API endpoint that mutates a stored dict should validate input against a
+declared schema before writing. `extra='forbid'` rejects unknown keys — so an attacker
+POSTing extra fields like `uuid=…`, `last_checked=…`, `history=[…]` can't smuggle them
+into storage. Per-route allowlists work but rot; one declared schema per stored shape
+doesn't.
+
+## Prefer a migration over permanent complexity
+
+If you're about to add a compatibility shim, an alias, a backward-compat fallback, or a
+"handle both old and new shape" branch — stop and ask whether a one-time `update_N`
+migration solves the same problem by *renaming the stored data*. A migration runs once
+per install; the shim lives in the code forever and every future contributor has to
+understand it.
+
+Concrete example from this PR: the original design used `Field(alias='llm_X')` so
+Pydantic could accept both the legacy form-field name (`llm_model`) and the new
+storage name (`model`). That alias survived every read/write for the life of the app
+and introduced a subtle `model_dump(by_alias=True)` merge bug. The simpler answer was
+to rename the form fields to match the storage names (an in-PR rename, no migration
+needed since storage was new), drop the aliases entirely, and delete ~25 lines of
+plumbing. **Pay once with a migration; don't pay forever with complexity.**
+
+Same principle applies the moment you find yourself writing `dict.get(new_key) or
+dict.get(old_key)`. That's a migration in disguise — write the migration instead.
+
+## Architecture choice: validator at the boundary, not domain model
+
+There are two ways to use Pydantic. Pick one per slice — they are not interchangeable.
+
+**Pydantic-as-validator (what we do).** Storage stays a plain dict. A `BaseModel`
+validates input at the boundary, dumps back to a dict. No call-site changes; the
+existing `watch['x']` dict access keeps working everywhere.
+
+**Pydantic-as-domain-model.** Replace `dict` inheritance with `BaseModel`. ~190 call
+sites switch from `watch['x']` to `watch.x`. Much bigger blast radius, defers the
+security win. Not what we're doing right now.
+
+The CWE-915 fix only needs the validator pattern. Domain-model replacement is a
+separate, later project.
+
+## The template (LLMSettings)
+
+The first migrated slice. Use as the reference for the next one.
+
+**Match the WTForms field names to the storage / Pydantic field names** so the
+form-input dict and the storage dict have the same key shape. No aliases, no
+`populate_by_name=True`, no `by_alias=True` merge gymnastics. Only reach for
+`Field(alias=…)` if you genuinely cannot rename the form field (rare).
+
+`model/LLMSettings.py`:
+
+```python
+class LLMSettings(BaseModel):
+    model_config = ConfigDict(extra='forbid')
+
+    enabled: bool = True
+    model: str = ''
+    ...
+
+    # System-managed counters
+    tokens_total_cumulative: int = 0
+    ...
+
+    # Field groups
+    CONNECTION_FIELDS: ClassVar[Tuple[str, ...]] = ('model', 'api_key', ...)
+    PROTECTED_FIELDS:  ClassVar[Tuple[str, ...]] = ('tokens_total_cumulative', ...)
+```
+
+Boundary pattern at the route handler:
+
+```python
+# Read
+settings = LLMSettings.model_validate(
+    datastore.data['settings']['application'].get('llm') or {}
+)
+
+# Merge form input
+form_input = dict(form.data.get('llm') or {})
+for protected in LLMSettings.PROTECTED_FIELDS:
+    form_input.pop(protected, None)  # counters never come from form
+merged = LLMSettings.model_validate({**settings.model_dump(), **form_input})
+
+# Write — re-validates the schema on every write
+datastore.data['settings']['application']['llm'] = merged.model_dump()
+```
+
+## Unresolved architectural decisions
+
+Two decisions need answers before the `WatchInput` slice. They're not blockers for `App.py`.
+
+### OpenAPI spec vs Pydantic model — who's source of truth?
+
+Today: `docs/api-spec.yaml` declares the Watch/Tag shape; `model/schema_utils.py` reads
+it to compute readonly fields; the API layer validates against it; the model layer is a
+plain dict that doesn't know about either. When `WatchInput` lands, that's a third
+shape declaration.
+
+Two ways to live:
+- **Pydantic is source.** Generate / sync `api-spec.yaml` from the model
+  (e.g. via `model_json_schema()`). One declaration, multiple consumers. Long-term
+  right answer; needs tooling.
+- **Parallel sources with discipline.** Hand-keep them aligned. Faster to ship but
+  drift is inevitable — that's the bug class we're already trying to close.
+
+Recommendation: start parallel (keep `api-spec.yaml` for now), but write Watch's
+Pydantic model so it could be the eventual single source. Don't *invent* a new
+field shape — match the spec.
+
+### Plugin / processor_config_* extensibility
+
+`processor_config_restock_diff` (and future processor configs) are written by
+plugins, not the core. `extra='forbid'` on a Watch input model would reject them.
+
+Options:
+- **Per-processor sub-models.** Each plugin owns its `<Processor>Settings` Pydantic
+  model; Watch input validates only core fields, processor configs validate
+  separately at their own boundary (the per-watch `restock_diff.json`, etc.).
+- **Opaque pass-through.** Watch input model treats `processor_config_*` as a
+  declared dict-typed field. Loses per-key validation but preserves the
+  plugin-extensibility contract.
+
+Recommendation: per-processor sub-models. Matches the file split already done in
+`update_30` (separate `restock_diff.json` per watch).
+
+## Migration order
+
+| Target | Difficulty | Value | Status |
+|---|---|---|---|
+| `LLMSettings` | low | medium | done (this PR) |
+| `App.py` → `AppSettings` (nested) | low | medium | next |
+| `WatchInput` (form/API validator) | medium | **HIGH — closes [GHSA-h3x5-5j56-hm2j][advisory]** | next-next |
+| `TagInput` (form/API validator) | medium | medium | after Watch |
+| `watch_base(dict)` → `BaseModel` | very high | high | separate multi-PR project, much later |
+
+`Tags.py` (TagsDict), `persistence.py`, `schema_utils.py` are not data models — leave alone.
+
+### Concrete next steps
+
+1. **`App.py`.** Pure dict tree under `settings.{application,requests,headers}`. Define
+   nested `BaseModel`s; `LLMSettings` slots in as the existing sub-tree. No call-site
+   churn — just the global settings POST handler. Sets the pattern for nested models.
+
+2. **`WatchInput` BaseModel** for `blueprint/ui/edit.py:225` and `api/Watch.py`. Replace:
+   ```python
+   datastore.data['watching'][uuid].update(form.data)  # CWE-915
+   ```
+   with:
+   ```python
+   validated = WatchInput.model_validate(form.data)
+   datastore.data['watching'][uuid].update(validated.model_dump())
+   ```
+   Closes the unpatched advisory. Should be a security-tagged commit referencing the GHSA.
+
+3. **`TagInput` BaseModel** — same pattern, smaller.
+
+## Gotchas discovered
+
+These cost real debugging time in the LLMSettings PR. Worth knowing before the next slice.
+
+### `extra='forbid'` is the right default
+
+`extra='ignore'` silently drops unknowns and hides developer mistakes (add a form field,
+forget to declare it on the model, your feature appears to work until you reload). `forbid`
+fails loudly. `allow` defeats the purpose entirely — it's how injection succeeds.
+
+### Don't use Field aliases unless you actually need them
+
+The LLMSettings PR originally used `alias='llm_X'` to bridge llm_-prefixed WTForms
+names to stripped storage names. That created a documented gotcha: with
+`extra='forbid'`, having both `model` and `llm_model` in the same input dict is a
+`ValidationError`, and merging existing-storage-dump with form input required
+`by_alias=True` to keep both sides on the alias shape. We fixed it by renaming the
+form fields to match the storage field names. **Match the form to the model
+upfront and you avoid the whole class of merge bugs.**
+
+### Round-trip counters through the model, don't mutate the dict
+
+If runtime code (e.g. a token accumulator) writes to the storage dict directly, the
+schema is bypassed. Load → mutate instance attributes → `model_dump()` → write back.
+This re-validates on every write and prevents drift.
+
+### Per-call validation needs strict + tolerant modes? Don't.
+
+You might be tempted to validate form input strictly but allow extras in storage
+hydration. Don't — `extra='forbid'` everywhere means storage drift is impossible. If
+something put unknown keys in storage, you want loud failure, not silent acceptance.
+
+### Migrations are convention-based by accident if you let them be
+
+`for k in list(d) if k.startswith('llm_')` is shorter than an explicit list but
+silently catches any future flat `llm_*` key. Migrations are forever — prefer an
+explicit allowlist of keys to move, even if it's verbose.
+
+## What NOT to do
+
+- Don't add custom helper methods (`dump_without_connection()`, `clear_X()`) when stock
+  `model_dump(exclude=set(FIELDS))` works. The standard idiom is more readable and
+  zero-line.
+- Don't push security/business logic into the model (e.g. SSRF guards, credential-exfil
+  checks). The model owns field shape and validation. Route handlers own
+  policy. Mixing them dilutes both.
+- Don't make `get_X_config()` return a Pydantic instance if callers do dict-style access.
+  Either migrate all call sites (high-touch) or keep returning a dict and let the model
+  be the validation/dump layer only.
+- Don't `model_copy(update=...)` without re-validating. It doesn't coerce types or
+  enforce `extra='forbid'`. Use `model_validate({**old.model_dump(), **updates})` for
+  strict merges.
+
+## Required for each new slice
+
+Each migration PR should ship:
+
+- `model/<Thing>Settings.py` (or input model) — declared schema, `extra='forbid'`,
+  field aliases if there's a name mismatch between form and storage.
+- `store/updates.py:update_N` if the storage shape changes. Pure dict-shuffling, no
+  Pydantic import (migrations should not depend on the model — model evolves
+  independently).
+- `tests/unit/test_<thing>.py` — unit coverage of the model itself: defaults,
+  alias merge, type coercion, `extra='forbid'` rejection, dump shapes.
+- All runtime callers updated to go through `get_<thing>_settings(datastore)` or
+  equivalent, not raw dict reads.
+
+## Reference
+
+- `model/LLMSettings.py` — the template
+- `tests/unit/test_llm_settings.py` — model unit-test template
+- `store/updates.py:update_31` — schema migration template
+- `blueprint/settings/__init__.py` (POST handler) — boundary-validation template
+- `llm/evaluator.py:accumulate_global_tokens` — instance-mutation-then-dump-back template
@@ -46,11 +46,26 @@ class model(EntityPersistenceMixin, watch_base):
        super(model, self).__init__(*arg, **kw)

        self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
+        self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')

        if kw.get('default'):
            self.update(kw['default'])
            del kw['default']

+    def matches_url(self, url: str) -> bool:
+        """Return True if this tag should be auto-applied to the given watch URL.
+
+        Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
+        substring match. Returns False if no pattern is configured.
+        """
+        import fnmatch
+        pattern = self.get('url_match_pattern', '').strip()
+        if not pattern or not url:
+            return False
+        if any(c in pattern for c in ('*', '?', '[')):
+            return fnmatch.fnmatch(url.lower(), pattern.lower())
+        return pattern.lower() in url.lower()
+
    # _save_to_disk() method provided by EntityPersistenceMixin
    # commit() and _get_commit_data() methods inherited from watch_base
    # Tag uses default _get_commit_data() (includes all keys)
@@ -353,39 +353,58 @@ class model(EntityPersistenceMixin, watch_base):
    def is_source_type_url(self):
        return self.get('url', '').startswith('source:')

-    @property
-    def effective_browser_profile(self):
-        """Resolve the effective BrowserProfile for this watch.
-
-        Walks the chain: watch → tag (overrides_watch=True) → global settings → built-in fallback.
-        Never raises. Returns a BrowserProfile instance.
-        """
-        from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS
-        if not self._datastore:
-            return BUILTIN_REQUESTS
-        try:
-            return resolve_browser_profile(self, self._datastore)
-        except Exception:
-            return BUILTIN_REQUESTS
-
    @property
    def get_fetch_backend(self):
-        """Legacy property — prefer effective_browser_profile.fetch_backend for new code.
-
-        Returns the raw fetch_backend stored on this watch (or 'requests' for PDFs).
-        Does NOT walk the tag/global resolution chain.
        """
+        Get the fetch backend for this watch with special case handling.
+
+        CHAIN RESOLUTION OPPORTUNITY:
+        Currently returns watch.fetch_backend directly, but doesn't implement
+        Watch → Tag → Global resolution chain. With Pydantic:
+
+        @computed_field
+        def resolved_fetch_backend(self) -> str:
+            # Special case: PDFs always use html_requests
+            if self.is_pdf:
+                return 'html_requests'
+
+            # Watch override
+            if self.fetch_backend and self.fetch_backend != 'system':
+                return self.fetch_backend
+
+            # Tag override (first tag with overrides_watch=True wins)
+            for tag_uuid in self.tags:
+                tag = self._datastore.get_tag(tag_uuid)
+                if tag.overrides_watch and tag.fetch_backend:
+                    return tag.fetch_backend
+
+            # Global default
+            return self._datastore.settings.fetch_backend
+        """
+        # Maybe also if is_image etc?
+        # This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
        if self.is_pdf:
-            return 'requests'
+            return 'html_requests'
+
        return self.get('fetch_backend')

    @property
    def fetcher_supports_screenshots(self):
-        """Return True if the resolved fetcher for this watch supports screenshots."""
+        """Return True if the fetcher configured for this watch supports screenshots.
+
+        Resolves 'system' via self._datastore, then checks supports_screenshots on
+        the actual fetcher class. Works for built-in and plugin fetchers alike.
+        """
        from changedetectionio import content_fetchers
-        fetcher_class = content_fetchers.get_fetcher(self.effective_browser_profile.fetch_backend)
+
+        fetcher_name = self.get_fetch_backend  # already handles is_pdf → html_requests
+        if not fetcher_name or fetcher_name == 'system':
+            fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
+
+        fetcher_class = getattr(content_fetchers, fetcher_name, None)
        if fetcher_class is None:
            return False
+
        return bool(getattr(fetcher_class, 'supports_screenshots', False))

    @property
@@ -446,22 +465,21 @@ class model(EntityPersistenceMixin, watch_base):
                    if ',' in i:
                        k, v = i.strip().split(',', 2)

-                        # The index history could contain a relative path, so we need to make the fullpath
-                        # so that python can read it
-                        # Cross-platform: check for any path separator (works on Windows and Unix)
-                        if os.sep not in v and '/' not in v and '\\' not in v:
-                            # Relative filename only, no path separators
-                            v = os.path.join(self.data_dir, v)
-                        else:
-                            # It's possible that they moved the datadir on older versions
-                            # So the snapshot exists but is in a different path
-                            # Cross-platform: use os.path.basename instead of split('/')
-                            snapshot_fname = os.path.basename(v)
-                            proposed_new_path = os.path.join(self.data_dir, snapshot_fname)
-                            if not os.path.exists(v) and os.path.exists(proposed_new_path):
-                                v = proposed_new_path
+                        # Always resolve history entries to within the watch's own data directory.
+                        # Entries restored from backup could contain absolute or traversal paths —
+                        # never trust them. Use realpath to also block symlink-based escapes.
+                        safe_data_dir = os.path.realpath(self.data_dir)
+                        snapshot_fname = os.path.basename(v.strip())
+                        resolved_path = os.path.realpath(os.path.join(self.data_dir, snapshot_fname))

-                        tmp_history[k] = v
+                        if not resolved_path.startswith(safe_data_dir + os.sep) and resolved_path != safe_data_dir:
+                            logger.warning(f"Skipping unsafe history entry for {self.get('uuid')}: {v!r}")
+                            continue
+
+                        if not os.path.exists(resolved_path):
+                            continue
+
+                        tmp_history[k] = resolved_path

        if len(tmp_history):
            self.__newest_history_key = list(tmp_history.keys())[-1]
@@ -544,6 +562,15 @@ class model(EntityPersistenceMixin, watch_base):
        if not filepath:
            filepath = self.history[timestamp]

+        # Confine every read to the watch's own data directory — defence in depth
+        # against any path that bypasses the history parser (e.g. direct filepath= callers).
+        # Set HISTORY_SNAPSHOT_FILE_ALLOW_OUTSIDE_WATCH_DATADIR=true to disable (not recommended).
+        if self.data_dir and not strtobool(os.getenv('HISTORY_SNAPSHOT_FILE_ALLOW_OUTSIDE_WATCH_DATADIR', 'False')):
+            safe_data_dir = os.path.realpath(self.data_dir)
+            resolved = os.path.realpath(filepath)
+            if not (resolved.startswith(safe_data_dir + os.sep) or resolved == safe_data_dir):
+                raise PermissionError(f"Snapshot path {filepath!r} is outside the watch data directory")
+
        # Check if binary file (image, PDF, etc.)
        # Binary files are NEVER saved with .br compression, only text files are
        binary_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.pdf', '.bin', '.jfif')
@@ -779,24 +806,50 @@ class model(EntityPersistenceMixin, watch_base):
        # Also in the case that the file didnt exist
        return True

-    def bump_favicon(self, url, favicon_base_64: str) -> None:
+    def bump_favicon(self, url, favicon_base_64: str, mime_type: str = None) -> None:
        from urllib.parse import urlparse
        import base64
        import binascii
-        decoded = None
+        import re

-        if url:
+        MAX_FAVICON_BYTES = 1 * 1024 * 1024  # 1 MB
+
+        MIME_TO_EXT = {
+            'image/png': 'png',
+            'image/x-icon': 'ico',
+            'image/vnd.microsoft.icon': 'ico',
+            'image/jpeg': 'jpg',
+            'image/gif': 'gif',
+            'image/svg+xml': 'svg',
+            'image/webp': 'webp',
+            'image/bmp': 'bmp',
+        }
+
+        extension = None
+
+        # If the caller already resolved the MIME type (e.g. from blob.type or a data URI),
+        # use that directly — it's more reliable than guessing from a URL path.
+        if mime_type:
+            extension = MIME_TO_EXT.get(mime_type.lower().split(';')[0].strip(), None)
+
+        # Fall back to extracting extension from URL path, unless it's a data URI.
+        if not extension and url and not url.startswith('data:'):
            try:
                parsed = urlparse(url)
                filename = os.path.basename(parsed.path)
-                (base, extension) = filename.lower().strip().rsplit('.', 1)
+                (_base, ext) = filename.lower().strip().rsplit('.', 1)
+                extension = ext
            except ValueError:
-                logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
-                return None
-        else:
-            # Assume favicon.ico
-            base = "favicon"
-            extension = "ico"
+                logger.warning(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}', defaulting to ico")
+
+        # Handle data URIs: extract MIME type from the URI itself when not already known
+        if not extension and url and url.startswith('data:'):
+            m = re.match(r'^data:([^;]+);base64,', url)
+            if m:
+                extension = MIME_TO_EXT.get(m.group(1).lower(), None)
+
+        if not extension:
+            extension = 'ico'

        fname = os.path.join(self.data_dir, f"favicon.{extension}")

@@ -805,22 +858,27 @@ class model(EntityPersistenceMixin, watch_base):
            decoded = base64.b64decode(favicon_base_64, validate=True)
        except (binascii.Error, ValueError) as e:
            logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
-        else:
-            if decoded:
-                try:
-                    with open(fname, 'wb') as f:
-                        f.write(decoded)
+            return None

-                    # Invalidate module-level favicon filename cache for this watch
-                    _FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
+        if len(decoded) > MAX_FAVICON_BYTES:
+            logger.warning(f"UUID: {self.get('uuid')} Favicon too large ({len(decoded)} bytes), skipping")
+            return None

-                    # A signal that could trigger the socket server to update the browser also
-                    watch_check_update = signal('watch_favicon_bump')
-                    if watch_check_update:
-                        watch_check_update.send(watch_uuid=self.get('uuid'))
+        try:
+            with open(fname, 'wb') as f:
+                f.write(decoded)

-                except Exception as e:
-                    logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
+            # Invalidate module-level favicon filename cache for this watch
+            _FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
+
+            # A signal that could trigger the socket server to update the browser also
+            watch_check_update = signal('watch_favicon_bump')
+            if watch_check_update:
+                watch_check_update.send(watch_uuid=self.get('uuid'))
+
+        except Exception as e:
+            logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
+            return None

        # @todo - Store some checksum and only write when its different
        logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
@@ -951,6 +1009,40 @@ class model(EntityPersistenceMixin, watch_base):
        return False


+    @staticmethod
+    def _llm_summary_prompt_hash(prompt: str) -> str:
+        """8-char hex hash of the prompt — used to detect when the prompt changes."""
+        import hashlib
+        return hashlib.md5(prompt.encode('utf-8', errors='replace')).hexdigest()[:8]
+
+    def get_llm_diff_summary(self, from_version, to_version, prompt: str = '') -> str:
+        """Return the cached AI Change Summary for this from→to + prompt combination, or ''.
+
+        The prompt hash is embedded in the filename so that a changed prompt
+        automatically produces a cache miss and triggers regeneration.
+        """
+        prompt_hash = self._llm_summary_prompt_hash(prompt)
+        fname = os.path.join(self.data_dir, f'change-summary-{from_version}-to-{to_version}-{prompt_hash}.txt')
+        if not os.path.isfile(fname):
+            logger.debug(f"LLM cached diff summary '{fname}' NOT found")
+            return ''
+        with open(fname, 'r', encoding='utf-8') as f:
+            logger.debug(f"LLM cached diff summary '{fname}' FOUND")
+            return f.read().strip()
+
+    def save_llm_diff_summary(self, summary: str, from_version, to_version, prompt: str = ''):
+        """Persist the AI Change Summary keyed by version pair + prompt hash."""
+        self.ensure_data_dir_exists()
+        prompt_hash = self._llm_summary_prompt_hash(prompt)
+        fname = os.path.join(self.data_dir, f'change-summary-{from_version}-to-{to_version}-{prompt_hash}.txt')
+        tmp = fname + '.tmp'
+        try:
+            with open(tmp, 'w', encoding='utf-8') as f:
+                f.write(summary)
+            os.replace(tmp, fname)
+        except OSError as e:
+            logger.warning(f"Could not write LLM summary cache {fname}: {e}")
+
    def pause(self):
        self['paused'] = True

@@ -974,6 +1066,7 @@ class model(EntityPersistenceMixin, watch_base):
        Prepare watch data for commit.

        Excludes processor_config_* keys (stored in separate files).
+        Excludes __-prefixed keys (transient in-memory state — must not persist to disk).
        Normalizes browser_steps to empty list if no meaningful steps.
        """
        import copy
@@ -987,8 +1080,11 @@ class model(EntityPersistenceMixin, watch_base):
        else:
            snapshot = dict(self)

-        # Exclude processor config keys (stored separately)
-        watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
+        # Exclude processor config keys (stored separately) and __-prefixed transient keys
+        watch_dict = {
+            k: copy.deepcopy(v) for k, v in snapshot.items()
+            if not k.startswith('processor_config_') and not k.startswith('__')
+        }

        # Normalize browser_steps: if no meaningful steps, save as empty list
        if not self.has_browser_steps:
@@ -186,8 +186,13 @@ class watch_base(dict):
            'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine.
            'content-type': None,
            'date_created': None,
+            'extract_lines_containing': [],  # Keep only lines containing these substrings (plain text, case-insensitive)
            'extract_text': [],  # Extract text by regex after filters
-            'browser_profile': 'system',  # machine-name key of a BrowserProfile; 'system' → resolve via chain
+            # LLM intent-based evaluation
+            'llm_intent': '',                # Plain-English description of what the user cares about (change filter)
+            'llm_change_summary': '',        # Prompt for AI Change Summary — replaces {{ diff }} in notifications
+            'llm_prefilter': None,           # CSS selector derived at setup time (semantic only, e.g. "footer")
+            'llm_evaluation_cache': {},      # {sha256(intent+diff): {important, summary}} - evaluated once, cached
            'fetch_backend': 'system',  # plaintext, playwright etc
            'fetch_time': 0.0,
            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
@@ -330,20 +335,22 @@ class watch_base(dict):
        if self.__watch_was_edited:
            return  # Already marked as edited

+        # __-prefixed keys are transient in-memory state (e.g. __check_status set by
+        # set_watch_minitext_status). They never persist to disk and must not trigger
+        # the edited flag — otherwise just observing a check in progress would force
+        # the next run to bypass the unchanged-content skip.
+        if isinstance(key, str) and key.startswith('__'):
+            return
+
        # Import from shared schema utilities (no circular dependency)
-        from .schema_utils import get_readonly_watch_fields
-        readonly_fields = get_readonly_watch_fields()
+        from .schema_utils import get_readonly_watch_fields, SYSTEM_MANAGED_NON_SPEC_FIELDS

-        # Additional system-managed fields not in OpenAPI spec (yet)
-        # These are set by processors/workers and should not trigger edited flag
-        additional_system_fields = {
-            'last_check_status',  # Set by processors
-            'restock',  # Set by restock processor
-            'last_viewed',  # Set by mark_all_viewed endpoint
-        }
-
-        # Only mark as edited if this is a user-writable field
-        if key not in readonly_fields and key not in additional_system_fields:
+        # `last_viewed` is set internally by mark_all_viewed and shouldn't flag the watch as
+        # edited, but is not in SYSTEM_MANAGED_NON_SPEC_FIELDS because it IS user-writable via
+        # the UpdateWatch schema (the API path).
+        if (key not in get_readonly_watch_fields()
+                and key != 'last_viewed'
+                and key not in SYSTEM_MANAGED_NON_SPEC_FIELDS):
            self.__watch_was_edited = True

    def __setitem__(self, key, value):
@@ -590,9 +597,7 @@ class watch_base(dict):
            return None

        try:
-            # _datastore is a ChangeDetectionStore (has .data) or a plain dict (unit tests)
-            store_data = self._datastore.data if hasattr(self._datastore, 'data') else self._datastore
-            value = store_data['settings']
+            value = self._datastore['settings']
            for key in path:
                value = value[key]
            return value
@@ -1,380 +0,0 @@
-"""
-BrowserProfile — named, reusable browser/fetcher configuration.
-
-Storage key
-----------
-Profiles are stored in ``settings.application.browser_profiles`` as a plain dict
-keyed by *machine name* — a lowercase, underscore-separated slug derived from the
-human-readable ``name`` field:
-
-    'My Blocking Chrome'        →  'my_blocking_chrome'
-    'Custom CDP — Mobile (375px)' →  'custom_cdp_mobile_375px'
-
-Using the machine name as the key means that deleting a profile and recreating
-it with the same name restores the original key, so all watches that referenced
-it continue to work without any manual re-linking.
-
-Resolution chain
----------------
-``resolve_browser_profile(watch, datastore)`` walks:
-
-    watch.browser_profile  →  first tag with overrides_watch=True  →
-    settings.application.browser_profile  →  built-in fallback
-
-It never raises.  Stale / missing machine-name references are logged and the
-resolver falls through to the next level.
-
-Built-in profiles
-----------------
-``BUILTIN_REQUESTS`` and ``BUILTIN_BROWSER`` are always available and cannot be
-deleted from the UI (``is_builtin=True``).  Their machine names are stored in
-``RESERVED_MACHINE_NAMES`` to block user profiles from shadowing them.
-
-Migration
---------
-``store/updates.py::update_31`` converts the legacy ``fetch_backend`` field on
-watches, tags and global settings into ``browser_profile`` machine-name
-references.  After that migration no legacy paths are needed here.
-"""
-
-from __future__ import annotations
-
-import os
-import re
-from typing import Optional
-
-from loguru import logger
-from pydantic import BaseModel, field_validator
-
-# Default User-Agent for the built-in plaintext requests profile.
-# Overridable via environment variable for deployments that need a custom UA.
-_DEFAULT_REQUESTS_UA = os.getenv(
-    "DEFAULT_SETTINGS_HEADERS_USERAGENT",
-    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
-)
-
-# ---------------------------------------------------------------------------
-# Constants
-# ---------------------------------------------------------------------------
-
-NAME_MAX_LEN = 100
-
-
-# ---------------------------------------------------------------------------
-# Model
-# ---------------------------------------------------------------------------
-
-class BrowserProfile(BaseModel):
-    """
-    A named, reusable configuration for how a watch fetches its target URL.
-
-    The *machine name* (see ``get_machine_name()``) is the stable storage key.
-    Updating ``name`` changes the machine name; any watch that referenced the
-    old machine name will then fall back through the resolution chain until it
-    is explicitly re-pointed.  To replace a profile without breaking watches,
-    delete it and recreate it with the *same* name.
-    """
-
-    name: str
-    """Human-readable label shown in the UI.  Max 100 characters."""
-
-    fetch_backend: str = 'requests'
-    """
-    Which fetch engine to use.  This is the *clean* fetcher name without the
-    ``html_`` module prefix (e.g. ``'requests'``, ``'webdriver'``,
-    ``'playwright'``, ``'puppeteer'``, ``'cloakbrowser'``).
-
-    The module-level ``html_`` prefix (``html_requests``, ``html_webdriver``,
-    …) is an implementation detail of ``content_fetchers/``.  Use
-    ``get_fetcher_class_name()`` to obtain the full module attribute name when
-    you need to look up the class.
-
-    Must be non-empty and contain only ``[a-z0-9_]`` characters.
-    """
-
-    is_builtin: bool = False
-    """Built-in profiles are always present and cannot be deleted from the UI."""
-
-    # ------------------------------------------------------------------
-    # Browser-specific settings (silently ignored by html_requests)
-    # ------------------------------------------------------------------
-
-    browser_connection_url: Optional[str] = None
-    """
-    Custom CDP / WebSocket endpoint, e.g. ``ws://my-chrome:3000``.
-    Overrides the system-wide ``PLAYWRIGHT_DRIVER_URL`` for this profile.
-    Only meaningful for ``html_webdriver`` profiles.
-    """
-
-    viewport_width: int = 1280
-    """
-    Browser viewport width in pixels.
-    Common presets: 375 (iPhone), 768 (tablet), 1280 (desktop).
-    """
-
-    viewport_height: int = 1000
-    """
-    Browser viewport height in pixels.
-    Common presets: 812 (iPhone), 1024 (tablet), 1000 (desktop).
-    """
-
-    block_images: bool = False
-    """
-    Block all image requests.  Typically cuts page-load time by 40-70 % on
-    image-heavy sites with no impact on text-based change detection.
-    """
-
-    block_fonts: bool = False
-    """Block web-font requests.  Modest speed gain; rarely affects detection."""
-
-    user_agent: Optional[str] = None
-    """
-    Override the browser User-Agent string.
-    ``None`` keeps the fetcher's built-in default, which already strips
-    obvious headless markers such as ``HeadlessChrome``.
-    """
-
-    ignore_https_errors: bool = False
-    """
-    Proceed even when the server's TLS certificate is invalid or self-signed.
-    Useful for staging / development environments.
-    """
-
-    locale: Optional[str] = None
-    """
-    Browser locale (e.g. ``en-US``, ``de-DE``).
-    Sets the ``Accept-Language`` header and ``navigator.language``.
-    Some sites serve different prices or copy based on locale.
-    """
-
-    custom_headers: str = ''
-    """
-    Extra HTTP headers sent with every request using this profile, in ``Key: Value`` format
-    (one per line, ``#`` lines are ignored).  Applied before per-watch headers so
-    individual watches can override them.
-    """
-
-    service_workers: str = 'allow'
-    """
-    Whether to allow Service Workers in the browser context.
-    Playwright accepts ``'allow'`` or ``'block'``.
-    Block to avoid large Service Worker data transfers (e.g. YouTube).
-    """
-
-    extra_delay: int = 0
-    """
-    Extra seconds to wait after page load before extracting content
-    (on top of the per-watch ``render_extract_delay``).
-    Sourced from ``WEBDRIVER_DELAY_BEFORE_CONTENT_READY`` at startup.
-    """
-
-    model_config = {"frozen": False}
-
-    # ------------------------------------------------------------------
-    # Validators
-    # ------------------------------------------------------------------
-
-    @field_validator('fetch_backend')
-    @classmethod
-    def _validate_fetch_backend(cls, v: str) -> str:
-        v = v.strip()
-        if not v:
-            raise ValueError('fetch_backend cannot be empty')
-        if not re.fullmatch(r'[a-z0-9_]+', v):
-            raise ValueError(
-                f"fetch_backend must contain only lowercase letters, digits and underscores, got {v!r}"
-            )
-        if v.startswith('html_'):
-            raise ValueError(
-                f"fetch_backend should be the clean fetcher name without the 'html_' prefix "
-                f"(e.g. 'requests', 'webdriver', 'playwright'). Got {v!r}. "
-                f"Use get_fetcher_class_name() to obtain the full module attribute name."
-            )
-        return v
-
-    @field_validator('name')
-    @classmethod
-    def _validate_name(cls, v: str) -> str:
-        v = v.strip()
-        if not v:
-            raise ValueError('Name cannot be empty')
-        if len(v) > NAME_MAX_LEN:
-            raise ValueError(f'Name must be {NAME_MAX_LEN} characters or less')
-        return v
-
-    # ------------------------------------------------------------------
-    # Machine-name helpers
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def machine_name_from_str(name: str) -> str:
-        """
-        Convert a human name to a machine-safe storage key.
-
-        Transformation rules (applied in order):
-
-        1. Strip surrounding whitespace; lower-case.
-        2. Replace runs of whitespace or hyphens with a single ``_``.
-        3. Drop every character that is not ``[a-z0-9_]``.
-        4. Collapse consecutive underscores.
-        5. Strip leading / trailing underscores.
-        6. Truncate to ``NAME_MAX_LEN`` characters.
-
-        Examples::
-
-            'My Blocking Browser Chrome'  →  'my_blocking_browser_chrome'
-            'Custom CDP — Mobile (375px)' →  'custom_cdp_mobile_375px'
-            '  Weird   ---   Name  '      →  'weird_name'
-        """
-        s = name.strip().lower()
-        s = re.sub(r'[\s\-]+', '_', s)    # whitespace / hyphens → underscore
-        s = re.sub(r'[^a-z0-9_]', '', s)  # drop everything else
-        s = re.sub(r'_+', '_', s)         # collapse repeated underscores
-        s = s.strip('_')                   # drop leading / trailing underscores
-        return s[:NAME_MAX_LEN]
-
-    def get_machine_name(self) -> str:
-        """Return the machine-safe storage key derived from this profile's ``name``."""
-        return self.machine_name_from_str(self.name)
-
-    def get_fetcher_class_name(self) -> str:
-        """Return the clean fetcher name for this profile (same as ``fetch_backend``).
-
-        Use with ``content_fetchers.get_fetcher()``::
-
-            from changedetectionio import content_fetchers
-            fetcher_cls = content_fetchers.get_fetcher(profile.get_fetcher_class_name())
-        """
-        return self.fetch_backend
-
-
-# ---------------------------------------------------------------------------
-# Built-in profiles (always present, cannot be deleted)
-# ---------------------------------------------------------------------------
-
-BUILTIN_REQUESTS = BrowserProfile(
-    name='Direct HTTP (requests)',
-    fetch_backend='requests',
-    is_builtin=True,
-    user_agent=_DEFAULT_REQUESTS_UA,
-)
-
-BUILTIN_PLAYWRIGHT = BrowserProfile(
-    name='Browser (Chrome/Playwright)',
-    fetch_backend='playwright_cdp',
-    is_builtin=True,
-)
-
-BUILTIN_SELENIUM = BrowserProfile(
-    name='Browser (Chrome/Selenium)',
-    fetch_backend='selenium',
-    is_builtin=True,
-)
-
-BUILTIN_PUPPETEER = BrowserProfile(
-    name='Browser (Chrome/Puppeteer)',
-    fetch_backend='puppeteer',
-    is_builtin=True,
-)
-
-# Backwards-compatible alias — code that imported BUILTIN_BROWSER keeps working.
-BUILTIN_BROWSER = BUILTIN_PLAYWRIGHT
-
-# Keyed by machine name for O(1) lookup.
-_BUILTINS: dict[str, BrowserProfile] = {
-    b.get_machine_name(): b
-    for b in (BUILTIN_REQUESTS, BUILTIN_PLAYWRIGHT, BUILTIN_SELENIUM, BUILTIN_PUPPETEER)
-}
-
-# Machine names that cannot be used by user-created profiles.
-RESERVED_MACHINE_NAMES: frozenset[str] = frozenset(_BUILTINS.keys())
-
-
-def get_default_browser_builtin() -> BrowserProfile:
-    """Final fallback when no profile can be resolved through the chain.
-
-    ``preconfigure_browser_profiles_based_on_env()`` sets
-    ``settings.application.browser_profile`` explicitly at startup, so this
-    fallback is only reached for watches with stale / missing machine-name
-    references.  Safe default is always direct HTTP requests.
-    """
-    return BUILTIN_REQUESTS
-
-
-# ---------------------------------------------------------------------------
-# Lookup helpers
-# ---------------------------------------------------------------------------
-
-def get_builtin_profiles() -> dict[str, BrowserProfile]:
-    """Return a shallow copy of the built-in profiles dict (keyed by machine name)."""
-    return dict(_BUILTINS)
-
-
-def get_profile(machine_name: str, store_profiles: dict) -> Optional[BrowserProfile]:
-    """
-    Look up a ``BrowserProfile`` by machine name.
-
-    Stored profiles are checked first so that env-configured built-ins (written
-    by ``preconfigure_browser_profiles_based_on_env``) take priority over the
-    bare module-level defaults.  Falls back to ``_BUILTINS`` when no stored
-    version exists.
-
-    Returns ``None`` when the machine name is unknown or the stored data is
-    corrupt (a warning is logged in the latter case).
-    """
-    raw = store_profiles.get(machine_name)
-    if raw is not None:
-        if isinstance(raw, BrowserProfile):
-            return raw
-        try:
-            return BrowserProfile(**raw)
-        except Exception as exc:
-            logger.warning(f"BrowserProfile '{machine_name}': failed to deserialize — {exc}")
-            # Fall through to built-in
-
-    if machine_name in _BUILTINS:
-        return _BUILTINS[machine_name]
-
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Resolution
-# ---------------------------------------------------------------------------
-
-def resolve_browser_profile(watch, datastore) -> BrowserProfile:
-    """
-    Resolve the effective ``BrowserProfile`` for *watch*.
-
-    Resolution chain
-    ~~~~~~~~~~~~~~~~
-    1. ``watch['browser_profile']`` — explicit machine name set on the watch.
-    2. First tag with ``overrides_watch=True`` that has ``browser_profile`` set.
-    3. ``settings.application['browser_profile']`` — system-wide default.
-    4. Built-in fallback: ``BUILTIN_REQUESTS`` (requests is always the safe default).
-
-    Never raises.  A stale / missing machine-name reference produces a
-    ``logger.warning`` and the resolver continues down the chain.
-    """
-    from changedetectionio.model.resolver import resolve_setting
-
-    store_profiles: dict = datastore.data['settings']['application'].get('browser_profiles', {})
-
-    machine_name = resolve_setting(
-        watch, datastore,
-        field_name='browser_profile',
-        sentinel_values={'system', 'default', ''},
-        default=None,
-        require_tag_override=True,
-    )
-
-    if machine_name:
-        profile = get_profile(machine_name, store_profiles)
-        if profile:
-            return profile
-        logger.warning(
-            f"Watch {watch.get('uuid')!r}: browser_profile {machine_name!r} not found, "
-            f"falling back through the chain"
-        )
-
-    return get_default_browser_builtin()
@@ -1,63 +0,0 @@
-"""
-Unified Watch → Tag → Global settings cascade resolver.
-
-All settings resolution follows the same priority order:
-  1. Watch-level setting (if set and not a sentinel "use parent" value)
-  2. First tag with overrides_watch=True that has the field set
-  3. Global application settings
-  4. Caller-supplied default
-
-This replaces the previously scattered manual resolution loops found in
-notification_service.py, processors/base.py, and the restock processor.
-"""
-
-
-def resolve_setting(watch, datastore, field_name, *,
-                    sentinel_values=None,
-                    default=None,
-                    require_tag_override=True):
-    """
-    Resolve a single setting value by walking the Watch → Tag → Global chain.
-
-    Args:
-        watch:               Watch dict / model object.
-        datastore:           App datastore (must have get_all_tags_for_watch() and
-                             data['settings']['application']).
-        field_name:          The setting key to look up at each level.
-        sentinel_values:     Set of values that mean "not configured here, keep looking".
-                             For example {'system'} for fetch_backend.
-        default:             Value returned when nothing is found in the chain.
-        require_tag_override: If True (default), only tags where overrides_watch=True
-                             contribute to the cascade.  Set to False when every tag
-                             that carries the field should be considered (e.g. for
-                             fields that make sense to merge/override at any tag level).
-
-    Returns:
-        The first non-sentinel, non-empty value found, or *default*.
-    """
-    _sentinels = set(sentinel_values) if sentinel_values else set()
-
-    def _is_unset(v):
-        return v is None or v == '' or v in _sentinels
-
-    # 1. Watch level
-    v = watch.get(field_name)
-    if not _is_unset(v):
-        return v
-
-    # 2. Tag level
-    tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
-    if tags:
-        for tag in tags.values():
-            if require_tag_override and not tag.get('overrides_watch'):
-                continue
-            v = tag.get(field_name)
-            if not _is_unset(v):
-                return v
-
-    # 3. Global application settings
-    v = datastore.data['settings']['application'].get(field_name)
-    if not _is_unset(v):
-        return v
-
-    return default
@@ -8,6 +8,35 @@ Shared by both the model layer and API layer to avoid circular dependencies.
 import functools


+# Watch fields written by workers/processors that are NOT part of the public OpenAPI spec.
+#
+# These fields exist on a watch dict at runtime but are internal implementation details
+# (skip-cache hashes, last-check status strings, LLM runtime state, etc.). Used by:
+#   - model/__init__.py: don't trigger the "edited" flag when these are written internally
+#   - api/Watch.py: strip from GET responses and silently discard from PUT/POST inputs
+#                   so that a GET → PUT round trip doesn't trip the unknown-field validator
+#
+# `last_viewed` is intentionally NOT included: it's set internally by mark_all_viewed BUT
+# is also explicitly writable via the UpdateWatch schema (see api/Watch.py valid_fields).
+SYSTEM_MANAGED_NON_SPEC_FIELDS = frozenset({
+    'last_check_status',           # Set by processors
+    'last_filter_config_hash',     # text_json_diff internal skip-cache
+    'restock',                     # Set by restock processor
+    '_llm_result',                 # LLM runtime — populated by evaluator
+    '_llm_intent',
+    '_llm_change_summary',
+    'llm_prefilter',
+    'llm_evaluation_cache',
+    'llm_last_tokens_used',
+    'llm_tokens_used_cumulative',
+})
+
+
+def get_system_managed_non_spec_fields():
+    """Return the set of internal fields not in the public OpenAPI spec."""
+    return SYSTEM_MANAGED_NON_SPEC_FIELDS
+
+
@functools.cache
 def get_openapi_schema_dict():
    """
@@ -48,8 +48,9 @@ To verify this works:
 """

 import json
+import os
 import re
-from urllib.parse import unquote_plus
+from urllib.parse import unquote_plus, urlparse

 import requests
 from apprise import plugins
@@ -59,6 +60,8 @@ from apprise.utils.logic import dict_full_update
 from loguru import logger
 from requests.structures import CaseInsensitiveDict

+from changedetectionio.validate_url import is_private_hostname, is_url_private_or_parser_confused
+
 SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}


@@ -195,6 +198,17 @@ def apprise_http_custom_handler(

    url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))

+    # SSRF protection — block private/loopback addresses unless explicitly allowed.
+    # Uses parser-agnostic check so urlparse/urllib3 differentials (GHSA-rph4-96w6-q594)
+    # can't smuggle an internal target past the gate.
+    if not os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', '').lower() in ('true', '1', 'yes'):
+        if is_url_private_or_parser_confused(url):
+            raise ValueError(
+                f"Notification target '{url}' is a private/reserved address "
+                f"or contains a parser-differential payload. "
+                f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
+            )
+
    response = requests.request(
        method=method,
        url=url,
@@ -65,6 +65,9 @@ def notification_format_align_with_apprise(n_format : str):
    :return:
    """

+    if not n_format:
+        return NotifyFormat.TEXT.value
+
    if n_format.startswith('html'):
        # Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here
        n_format = NotifyFormat.HTML.value
@@ -259,9 +262,12 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
    elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
          or url.startswith('https://discord.com/api'))\
            and 'html' in requested_output_format:
-        # Discord doesn't support HTML, replace <br> with newlines
+        # Discord doesn't render HTML — convert markup to plain text equivalents.
+        # &nbsp; is injected upstream to preserve double-spaces for HTML email clients;
+        # Discord displays it as the literal string "&nbsp;" so strip it here.
        n_body = n_body.strip().replace('<br>', '\n')
        n_body = n_body.replace('</br>', '\n')
+        n_body = n_body.replace('&nbsp;', ' ')
        n_body = newline_re.sub('\n', n_body)

        # Don't replace placeholders or truncate here - let the custom Discord plugin handle it
@@ -358,9 +364,55 @@ def process_notification(n_object: NotificationContextData, datastore):
        # Should always be false for 'text' mode or its too hard to read
        # But otherwise, this could be some setting
        word_diff=False if requested_output_format_original == 'text' else True,
+        # HTML-format notifications must escape diff content (GHSA-q8xq-qg4x-wphg).
+        # FormattableDiff/Extract escape internally so {{ diff(...) }} stays callable —
+        # the post-Jinja escape loop below would otherwise convert them to plain str.
+        escape_output='html' in requested_output_format,
        )
    )

+    # {{ raw_diff }} always holds the actual diff regardless of AI Change Summary
+    n_object['raw_diff'] = n_object.get('diff', '')
+
+    # AI Change Summary: optionally replace {{ diff }} with the AI summary
+    _llm_change_summary = (n_object.get('_llm_change_summary') or '').strip()
+    from changedetectionio.llm.evaluator import get_llm_settings
+    _override_diff = get_llm_settings(datastore).override_diff_with_summary
+    if _llm_change_summary and _override_diff:
+        n_object['diff'] = _llm_change_summary
+
+    # Lazily populate llm_summary / llm_intent if used in notification template
+    scan_text = n_object.get('notification_body', '') + n_object.get('notification_title', '')
+    if 'llm_summary' in scan_text or 'llm_intent' in scan_text or 'raw_diff' in scan_text:
+        n_object['llm_summary'] = _llm_change_summary or (n_object.get('_llm_result') or {}).get('summary', '')
+        n_object['llm_intent'] = n_object.get('_llm_intent', '')
+
+    # Escape diff/snapshot variables before Jinja renders them into an HTML notification.
+    # GHSA-q8xq-qg4x-wphg: inscriptis decodes HTML entities when converting text/html
+    # pages to snapshot text, so a page that visibly displays "&lt;a href...&gt;" yields
+    # literal "<a href...>" in the snapshot — which would otherwise render as live
+    # markup in HTML emails / Telegram (parse_mode=html) / Discord embeds, letting a
+    # watched page inject phishing links into the operator's notification channel.
+    # Also covers #3529 — raw '<' chars from text/plain pages breaking HTML email layout.
+    # The operator's own template HTML (e.g. <a href="{{watch_url}}">) is outside the
+    # variable values so it stays untouched. Diff placemarkers contain no HTML chars,
+    # so they survive escape and are still replaced with <span> tags later.
+    if 'html' in requested_output_format:
+        from markupsafe import escape as html_escape
+        from changedetectionio.notification_service import FormattableDiff, FormattableExtract
+        _page_content_keys = {'raw_diff', 'current_snapshot', 'prev_snapshot', 'triggered_text'}
+        for key in [k for k in notification_parameters if k.startswith('diff') or k in _page_content_keys]:
+            value = notification_parameters.get(key)
+            if not value:
+                continue
+            # FormattableDiff / FormattableExtract are callable str subclasses — {{ diff(lines=5) }}
+            # etc. relies on __call__. Wrapping them with str(html_escape(...)) here would lose
+            # __call__ and break those tokens. They escape internally via escape_output=True
+            # (set by add_rendered_diff_to_notification_vars above) for both __str__ and __call__.
+            if isinstance(value, (FormattableDiff, FormattableExtract)):
+                continue
+            notification_parameters[key] = str(html_escape(str(value)))
+
    with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
        for url in n_object['notification_urls']:

@@ -378,13 +430,6 @@ def process_notification(n_object: NotificationContextData, datastore):
            logger.info(f">> Process Notification: AppRise start notifying '{url}'")
            url = jinja_render(template_str=url, **notification_parameters)

-            # If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped
-            watch_mime_type = n_object.get('watch_mime_type')
-            if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower():
-                if 'html' in requested_output_format:
-                    from markupsafe import escape
-                    n_body = str(escape(n_body))
-
            if 'html' in requested_output_format:
                # Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output
                # But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and&nbsp;this" etc which is too much.
@@ -1,3 +0,0 @@
-from .registry import registry, NotificationProfileType, AppriseProfileType
-
-__all__ = ['registry', 'NotificationProfileType', 'AppriseProfileType']
@@ -1,73 +0,0 @@
-"""
-Per-profile notification log.
-
-Each profile gets its own log file at:
-  {datastore_path}/notification-logs/{profile_uuid}.log
-
-Entries are stored as JSON-lines (one JSON object per line).
-The file is capped at MAX_ENTRIES lines (oldest pruned first).
-"""
-
-import json
-import os
-from datetime import datetime, timezone
-
-MAX_ENTRIES = 100
-_LOG_DIR = 'notification-logs'
-
-
-def _log_file(datastore_path: str, profile_uuid: str) -> str:
-    return os.path.join(datastore_path, _LOG_DIR, f'{profile_uuid}.log')
-
-
-def write_profile_log(datastore_path: str, profile_uuid: str, *,
-                      watch_url: str = '',
-                      watch_uuid: str = '',
-                      status: str,        # 'ok' | 'error' | 'test'
-                      message: str = ''):
-    """Append one log entry; prune to MAX_ENTRIES."""
-    log_dir = os.path.join(datastore_path, _LOG_DIR)
-    os.makedirs(log_dir, exist_ok=True)
-
-    entry = json.dumps({
-        'ts':         datetime.now(tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'),
-        'watch_url':  watch_url[:200],
-        'watch_uuid': watch_uuid,
-        'status':     status,
-        'message':    message[:500],
-    }, ensure_ascii=False)
-
-    path = _log_file(datastore_path, profile_uuid)
-    try:
-        with open(path, 'r', encoding='utf-8') as fh:
-            lines = [l for l in fh.read().splitlines() if l.strip()]
-    except FileNotFoundError:
-        lines = []
-
-    lines.append(entry)
-    lines = lines[-MAX_ENTRIES:]
-
-    with open(path, 'w', encoding='utf-8') as fh:
-        fh.write('\n'.join(lines) + '\n')
-
-
-def read_profile_log(datastore_path: str, profile_uuid: str) -> list:
-    """Return log entries as a list of dicts, newest first."""
-    path = _log_file(datastore_path, profile_uuid)
-    try:
-        with open(path, 'r', encoding='utf-8') as fh:
-            lines = [l.strip() for l in fh if l.strip()]
-    except FileNotFoundError:
-        return []
-
-    entries = []
-    for line in reversed(lines):
-        try:
-            entries.append(json.loads(line))
-        except (json.JSONDecodeError, ValueError):
-            pass
-    return entries
-
-
-def has_log(datastore_path: str, profile_uuid: str) -> bool:
-    return os.path.exists(_log_file(datastore_path, profile_uuid))
@@ -1,111 +0,0 @@
-"""
-Notification Profile Type plugin registry.
-
-NotificationProfileType is the abstract base — the only contract is send().
-Plugins are free to use any delivery mechanism (Apprise, direct HTTP, SDK, etc.).
-
-Built-in: AppriseProfileType (raw Apprise URL list).
-
-Third-party plugins register additional types:
-
-    from changedetectionio.notification_profiles.registry import registry, NotificationProfileType
-
-    @registry.register
-    class MyProfileType(NotificationProfileType):
-        type_id      = "mytype"
-        display_name = "My Service"
-        icon         = "bell"
-        template     = "my_plugin/notification_profiles/types/mytype.html"
-
-        def send(self, config: dict, n_object: dict, datastore) -> bool:
-            requests.post(config['webhook_url'], json={"text": n_object['notification_body']})
-            return True
-"""
-
-from abc import ABC, abstractmethod
-
-
-class NotificationProfileType(ABC):
-    type_id:      str = NotImplemented
-    display_name: str = NotImplemented
-    icon:         str = "bell"          # feather icon name
-    template:     str = NotImplemented  # Jinja2 partial rendered in the profile edit form
-
-    @abstractmethod
-    def send(self, config: dict, n_object: dict, datastore) -> bool:
-        """
-        Deliver the notification.
-
-        Args:
-            config:    The profile's config dict (type-specific fields).
-            n_object:  Fully-rendered NotificationContextData (title, body, format, etc.).
-            datastore: App datastore for any extra lookups.
-
-        Returns True on success, False on failure (do not raise — log instead).
-        """
-
-    def validate(self, config: dict) -> None:
-        """Raise ValueError with a user-readable message on invalid config."""
-        pass
-
-    def get_url_hint(self, config: dict) -> str:
-        """Short display string shown in the selector chip tooltip / dropdown row."""
-        return ''
-
-
-class AppriseProfileType(NotificationProfileType):
-    """Delivers notifications via Apprise using a raw URL list."""
-
-    type_id      = "apprise"
-    display_name = "Apprise"
-    icon         = "bell"
-    template     = "notification_profiles/types/apprise.html"
-
-    def get_apprise_urls(self, config: dict) -> list:
-        return config.get('notification_urls') or []
-
-    def send(self, config: dict, n_object, datastore) -> bool:
-        from changedetectionio.notification.handler import process_notification
-        from changedetectionio.notification_service import NotificationContextData
-        urls = self.get_apprise_urls(config)
-        if not urls:
-            return False
-        if not isinstance(n_object, NotificationContextData):
-            n_object = NotificationContextData(n_object)
-        n_object['notification_urls']   = urls
-        n_object['notification_title']  = config.get('notification_title') or n_object.get('notification_title')
-        n_object['notification_body']   = config.get('notification_body')  or n_object.get('notification_body')
-        n_object['notification_format'] = config.get('notification_format') or n_object.get('notification_format')
-        process_notification(n_object, datastore)
-        return True
-
-    def get_url_hint(self, config: dict) -> str:
-        urls = config.get('notification_urls') or []
-        if urls:
-            u = urls[0]
-            return (u[:60] + '…') if len(u) > 60 else u
-        return ''
-
-
-class _Registry:
-    def __init__(self):
-        self._types: dict = {}
-
-    def register(self, cls):
-        """Register a NotificationProfileType subclass. Usable as a decorator."""
-        instance = cls()
-        self._types[instance.type_id] = instance
-        return cls
-
-    def get(self, type_id: str) -> NotificationProfileType:
-        return self._types.get(type_id, self._types.get('apprise'))
-
-    def all(self) -> list:
-        return list(self._types.values())
-
-    def choices(self) -> list:
-        return [(t.type_id, t.display_name) for t in self._types.values()]
-
-
-registry = _Registry()
-registry.register(AppriseProfileType)
@@ -1,49 +0,0 @@
-"""
-Resolve the full set of NotificationProfile objects that should fire for a given watch.
-
-Merges profile UUIDs from: Watch → Tags → System (union, deduplicated).
-Mute cascade is checked separately via resolve_setting() before calling this.
-"""
-
-from loguru import logger
-
-
-def resolve_notification_profiles(watch, datastore) -> list:
-    """
-    Return list of (profile_dict, NotificationProfileType) tuples to fire for *watch*.
-
-    Profiles are deduplicated by UUID — if the same UUID appears at multiple levels
-    it fires once, not multiple times.
-    """
-    from changedetectionio.notification_profiles.registry import registry
-
-    all_profiles = datastore.data['settings']['application'].get('notification_profile_data', {})
-
-    seen = set()
-    result = []
-
-    def _add(uuids):
-        for uid in (uuids or []):
-            if uid in seen:
-                continue
-            profile = all_profiles.get(uid)
-            if not profile:
-                logger.warning(f"Notification profile UUID {uid!r} not found, skipping")
-                continue
-            seen.add(uid)
-            type_handler = registry.get(profile.get('type', 'apprise'))
-            result.append((profile, type_handler))
-
-    # 1. Watch-level
-    _add(watch.get('notification_profiles', []))
-
-    # 2. Tag/group level
-    tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
-    if tags:
-        for tag in tags.values():
-            _add(tag.get('notification_profiles', []))
-
-    # 3. System level
-    _add(datastore.data['settings']['application'].get('notification_profiles', []))
-
-    return result
@@ -29,7 +29,7 @@ def _check_cascading_vars(datastore, var_name, watch):
    v = watch.get(var_name)
    if v and not watch.get('notification_muted'):
        if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
-            return datastore.data['settings']['application'].get('notification_format')
+            return datastore.data['settings']['application'].get('notification_format') or default_notification_format

        return v

@@ -88,6 +88,35 @@ class FormattableTimestamp(str):
            return self._dt.isoformat()


+class FormattableExtract(str):
+    """
+    A str subclass that holds only the extracted changed fragments from a diff.
+    Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
+
+        {{ diff_changed_from }}   → old value(s) only, e.g. "$99.99"
+        {{ diff_changed_to }}     → new value(s) only, e.g. "$109.99"
+
+    Multiple changed fragments are joined with newlines.
+    Being a str subclass means it is natively JSON serializable.
+    """
+    def __new__(cls, prev_snapshot, current_snapshot, extract_fn, escape_output=False):
+        if prev_snapshot or current_snapshot:
+            from changedetectionio import diff as diff_module
+            # word_diff=True is required — placemarker extraction regexes only exist in word-diff output
+            raw = diff_module.render_diff(prev_snapshot or '', current_snapshot or '', word_diff=True)
+            extracted = extract_fn(raw)
+        else:
+            extracted = ''
+        if escape_output and extracted:
+            # Placemarkers (@removed_PLACEMARKER_OPEN etc) contain no HTML chars,
+            # so html_escape leaves them intact — they still get swapped to <span>
+            # tags later by apply_service_tweaks. See GHSA-q8xq-qg4x-wphg.
+            from markupsafe import escape as html_escape
+            extracted = str(html_escape(extracted))
+        instance = super().__new__(cls, extracted)
+        return instance
+
+
 class FormattableDiff(str):
    """
    A str subclass representing a rendered diff. As a plain string it renders
@@ -105,16 +134,23 @@ class FormattableDiff(str):

    Being a str subclass means it is natively JSON serializable.
    """
-    def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
+    def __new__(cls, prev_snapshot, current_snapshot, escape_output=False, **base_kwargs):
        if prev_snapshot or current_snapshot:
            from changedetectionio import diff as diff_module
            rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
        else:
            rendered = ''
+        if escape_output and rendered:
+            # Placemarkers (@removed_PLACEMARKER_OPEN etc) contain no HTML chars,
+            # so html_escape leaves them intact — they still get swapped to <span>
+            # tags later by apply_service_tweaks. See GHSA-q8xq-qg4x-wphg.
+            from markupsafe import escape as html_escape
+            rendered = str(html_escape(rendered))
        instance = super().__new__(cls, rendered)
        instance._prev = prev_snapshot
        instance._current = current_snapshot
        instance._base_kwargs = base_kwargs
+        instance._escape_output = escape_output
        return instance

    def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
@@ -140,6 +176,10 @@ class FormattableDiff(str):
        if lines is not None:
            result = '\n'.join(result.splitlines()[:int(lines)])

+        if self._escape_output and result:
+            from markupsafe import escape as html_escape
+            result = str(html_escape(result))
+
        return result


@@ -161,7 +201,11 @@ class NotificationContextData(dict):
            'diff_patch': FormattableDiff('', '', patch_format=True),
            'diff_removed': FormattableDiff('', '', include_added=False),
            'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
+            'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
+            'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
            'diff_url': None,
+            # Always the raw +/- diff regardless of LLM summary override (populated in handler.py from {{diff}})
+            'raw_diff': FormattableDiff('', ''),
            'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
            'notification_timestamp': time.time(),
            'prev_snapshot': None,
@@ -170,6 +214,8 @@ class NotificationContextData(dict):
            'timestamp_from': None,
            'timestamp_to': None,
            'triggered_text': None,
+            'llm_summary': None,     # AI plain-English summary of what changed (requires AI intent to be configured)
+            'llm_intent': None,      # The intent that was evaluated (watch-level or inherited from tag)
            'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX',  # Converted to 'watch_uuid' in create_notification_parameters
            'watch_mime_type': None,
            'watch_tag': None,
@@ -209,7 +255,7 @@ class NotificationContextData(dict):

        super().__setitem__(key, value)

-def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
+def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool, escape_output:bool=False):
    """
    Efficiently renders only the diff placeholders that are actually used in the notification text.

@@ -222,6 +268,9 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
        prev_snapshot: Previous version of content for diff comparison
        current_snapshot: Current version of content for diff comparison
        word_diff: Whether to use word-level (True) or line-level (False) diffing
+        escape_output: If True, the rendered diff output is HTML-escaped. Used for HTML-format
+            notifications so attacker-controlled page content can't inject live markup.
+            Both the cached str representation and the result of {{ diff(...) }} calls are escaped.

    Returns:
        dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
@@ -244,16 +293,27 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
        'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
    }

+    from changedetectionio.diff import extract_changed_from, extract_changed_to
+    extract_specs = {
+        'diff_changed_from': extract_changed_from,
+        'diff_changed_to':   extract_changed_to,
+    }
+
    ret = {}
    rendered_count = 0
-    # Only create FormattableDiff objects for diff keys actually used in the notification text
+    # Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
    for key in NotificationContextData().keys():
-        if key.startswith('diff') and key in diff_specs:
-            # Check if this placeholder is actually used in the notification text
-            pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
-            if re.search(pattern, notification_scan_text, re.IGNORECASE):
-                ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
-                rendered_count += 1
+        if not key.startswith('diff'):
+            continue
+        pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
+        if not re.search(pattern, notification_scan_text, re.IGNORECASE):
+            continue
+        if key in diff_specs:
+            ret[key] = FormattableDiff(prev_snapshot, current_snapshot, escape_output=escape_output, **diff_specs[key])
+            rendered_count += 1
+        elif key in extract_specs:
+            ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key], escape_output=escape_output)
+            rendered_count += 1

    if rendered_count:
        logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
@@ -375,6 +435,11 @@ class NotificationService:
        n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch)
        n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch)

+        # Attach LLM results so notification tokens render correctly
+        n_object['_llm_result'] = watch.get('_llm_result')
+        n_object['_llm_intent'] = watch.get('_llm_intent', '')
+        n_object['_llm_change_summary'] = watch.get('_llm_change_summary', '')
+
        # (Individual watch) Only prepare to notify if the rules above matched
        queued = False
        if n_object and n_object.get('notification_urls'):
@@ -414,7 +479,7 @@ Thanks - Your omniscient changedetection.io installation.
            'notification_body': body,
            'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
        })
-        n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
+        n_object['markup_text_links_to_html_links'] = (n_object.get('notification_format') or '').startswith('html')

        if len(watch['notification_urls']):
            n_object['notification_urls'] = watch['notification_urls']
@@ -461,9 +526,9 @@ Thanks - Your omniscient changedetection.io installation.
        n_object = NotificationContextData({
            'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
            'notification_body': body,
-            'notification_format': self._check_cascading_vars('notification_format', watch),
+            'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
        })
-        n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
+        n_object['markup_text_links_to_html_links'] = (n_object.get('notification_format') or '').startswith('html')

        if len(watch['notification_urls']):
            n_object['notification_urls'] = watch['notification_urls']
@@ -61,7 +61,7 @@ class ChangeDetectionSpec:
        pass

    @hookspec
-    def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url):
+    def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url, llm_intent=None):
        """Provide custom implementation of get_itemprop_availability for a specific fetcher.

        This hook allows plugins to provide their own product availability detection
@@ -73,6 +73,7 @@ class ChangeDetectionSpec:
            fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
            fetcher_instance: The fetcher instance that generated the content
            url: The URL being watched/checked
+            llm_intent: Optional user-supplied intent string (e.g. "alert when price drops below $300")

        Returns:
            dict or None: Dictionary with availability data:
@@ -174,6 +175,64 @@ class ChangeDetectionSpec:
        """
        pass

+    @hookspec
+    def get_html_head_extras():
+        """Return HTML to inject into the <head> of every page via base.html.
+
+        Plugins can use this to add <script>, <style>, or <link> tags that should
+        be present on all pages.  Return a raw HTML string or None.
+
+        IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
+        sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
+        work correctly.  This hook is called inside a request context so url_for() is
+        always available.
+
+        For small amounts of CSS/JS, return them inline — no file-serving needed::
+
+            from changedetectionio.pluggy_interface import hookimpl
+
+            @hookimpl
+            def get_html_head_extras(self):
+                return (
+                    '<style>.my-module-banner { color: red; }</style>\\n'
+                    '<script>console.log("my_module_content loaded");</script>'
+                )
+
+        For larger assets, register your own lightweight Flask routes in the plugin
+        module and point to them with url_for() so the sub-path prefix is handled
+        automatically::
+
+            from flask import url_for, Response
+            from changedetectionio.pluggy_interface import hookimpl
+            from changedetectionio.flask_app import app as _app
+
+            MY_CSS = ".my-module-example { color: red; }"
+            MY_JS  = "console.log('my_module_content loaded');"
+
+            @_app.route('/my_module_content/css')
+            def my_module_content_css():
+                return Response(MY_CSS, mimetype='text/css',
+                                headers={'Cache-Control': 'max-age=3600'})
+
+            @_app.route('/my_module_content/js')
+            def my_module_content_js():
+                return Response(MY_JS, mimetype='application/javascript',
+                                headers={'Cache-Control': 'max-age=3600'})
+
+            @hookimpl
+            def get_html_head_extras(self):
+                css = url_for('my_module_content_css')
+                js  = url_for('my_module_content_js')
+                return (
+                    f'<link rel="stylesheet" href="{css}">\\n'
+                    f'<script src="{js}" defer></script>'
+                )
+
+        Returns:
+            str or None: Raw HTML string to inject inside <head>, or None
+        """
+        pass
+

 # Set up Plugin Manager
 plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -183,24 +242,27 @@ plugin_manager.add_hookspecs(ChangeDetectionSpec)

 # Load plugins from subdirectories
 def load_plugins_from_directories():
-    # Dictionary of directories to scan for plugins
-    plugin_dirs = {
-        'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
-        # Add more plugin directories here as needed
-    }
-    
-    # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory
-    
-    for dir_name, dir_path in plugin_dirs.items():
+    # List of (python_package_prefix, filesystem_path) pairs to scan for plugins.
+    # NOTE: processors/restock_diff/plugins is intentionally excluded here — those
+    # plugins are registered via register_builtin_restock_plugins() to avoid the
+    # circular import: restock_diff/__init__.py → model.Watch → content_fetchers → pluggy_interface.
+    plugin_dirs = [
+        (
+            'changedetectionio.conditions.plugins',
+            os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
+        ),
+    ]
+
+    for module_prefix, dir_path in plugin_dirs:
        if not os.path.exists(dir_path):
            continue
-            
+
        # Get all Python files (excluding __init__.py)
        for filename in os.listdir(dir_path):
            if filename.endswith(".py") and filename != "__init__.py":
                module_name = filename[:-3]  # Remove .py extension
-                module_path = f"changedetectionio.{dir_name}.plugins.{module_name}"
-                
+                module_path = f"{module_prefix}.{module_name}"
+
                try:
                    module = importlib.import_module(module_path)
                    # Register the plugin with pluggy
@@ -237,23 +299,14 @@ def register_builtin_fetchers():
    This is called from content_fetchers/__init__.py after all fetchers are imported
    to avoid circular import issues.
    """
-    from changedetectionio.content_fetchers import requests, puppeteer, webdriver_selenium
-    from changedetectionio.content_fetchers.playwright import CDP, chrome, firefox, webkit
+    from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium

+    # Register each built-in fetcher plugin
    if hasattr(requests, 'requests_plugin'):
        plugin_manager.register(requests.requests_plugin, 'builtin_requests')

-    if hasattr(CDP, 'cdp_plugin'):
-        plugin_manager.register(CDP.cdp_plugin, 'builtin_playwright_cdp')
-
-    if hasattr(chrome, 'chrome_plugin'):
-        plugin_manager.register(chrome.chrome_plugin, 'builtin_playwright_chrome')
-
-    if hasattr(firefox, 'firefox_plugin'):
-        plugin_manager.register(firefox.firefox_plugin, 'builtin_playwright_firefox')
-
-    if hasattr(webkit, 'webkit_plugin'):
-        plugin_manager.register(webkit.webkit_plugin, 'builtin_playwright_webkit')
+    if hasattr(playwright, 'playwright_plugin'):
+        plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')

    if hasattr(puppeteer, 'puppeteer_plugin'):
        plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
@@ -261,6 +314,24 @@ def register_builtin_fetchers():
    if hasattr(webdriver_selenium, 'webdriver_selenium_plugin'):
        plugin_manager.register(webdriver_selenium.webdriver_selenium_plugin, 'builtin_webdriver_selenium')

+
+def register_builtin_restock_plugins():
+    """Register built-in restock processor plugins after all imports are complete.
+
+    Called from content_fetchers/__init__.py alongside register_builtin_fetchers()
+    to avoid the circular import that occurs when loading via load_plugins_from_directories()
+    (restock_diff/__init__.py → model.Watch → content_fetchers → pluggy_interface).
+    """
+    import importlib
+    module_path = 'changedetectionio.processors.restock_diff.plugins.llm_restock'
+    try:
+        module = importlib.import_module(module_path)
+        if not plugin_manager.is_registered(module):
+            plugin_manager.register(module, 'llm_restock')
+            logger.debug("Registered built-in restock plugin: llm_restock")
+    except Exception as e:
+        logger.error(f"Failed to register llm_restock plugin: {e}")
+
 # Helper function to collect UI stats extras from all plugins
 def collect_ui_edit_stats_extras(watch):
    """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
@@ -297,7 +368,7 @@ def collect_fetcher_status_icons(fetcher_name):

    return None

-def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url):
+def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url, llm_intent=None):
    """Get itemprop availability data from plugins as a fallback.

    This is called when the built-in get_itemprop_availability doesn't find good data.
@@ -307,6 +378,7 @@ def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instanc
        fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
        fetcher_instance: The fetcher instance that generated the content
        url: The URL being watched (watch.link - includes Jinja2 evaluation)
+        llm_intent: Optional user-supplied intent string passed through to plugins

    Returns:
        dict or None: Availability data dictionary from first matching plugin, or None
@@ -316,7 +388,8 @@ def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instanc
        content=content,
        fetcher_name=fetcher_name,
        fetcher_instance=fetcher_instance,
-        url=url
+        url=url,
+        llm_intent=llm_intent,
    )

    # Return first non-None result with actual data
@@ -369,28 +442,57 @@ def get_active_plugins():


 def get_fetcher_capabilities(watch, datastore):
-    """Get capability flags for a watch's resolved fetcher.
+    """Get capability flags for a watch's fetcher.

-    Uses the BrowserProfile resolution chain (watch → tag → global → built-in)
-    to determine the actual fetcher class, then reads its capability flags.
+    Args:
+        watch: The watch object/dict
+        datastore: The datastore to resolve 'system' fetcher

    Returns:
-        dict: {'supports_browser_steps': bool, 'supports_screenshots': bool,
-               'supports_xpath_element_data': bool}
+        dict: Dictionary with capability flags:
+            {
+                'supports_browser_steps': bool,
+                'supports_screenshots': bool,
+                'supports_xpath_element_data': bool
+            }
    """
-    from changedetectionio.model.browser_profile import resolve_browser_profile
+    # Get the fetcher name from watch
+    fetcher_name = watch.get('fetch_backend', 'system')
+
+    # Resolve 'system' to actual fetcher
+    if fetcher_name == 'system':
+        fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
+
+    # Get the fetcher class
    from changedetectionio import content_fetchers

-    profile = resolve_browser_profile(watch, datastore)
-    fetcher_class = content_fetchers.get_fetcher(profile.fetch_backend)
+    # Try to get from built-in fetchers first
+    if hasattr(content_fetchers, fetcher_name):
+        fetcher_class = getattr(content_fetchers, fetcher_name)
+        return {
+            'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
+            'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
+            'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
+        }

-    if fetcher_class is None:
-        return {'supports_browser_steps': False, 'supports_screenshots': False, 'supports_xpath_element_data': False}
+    # Try to get from plugin-provided fetchers
+    # Query all plugins for registered fetchers
+    plugin_fetchers = plugin_manager.hook.register_content_fetcher()
+    for fetcher_registration in plugin_fetchers:
+        if fetcher_registration:
+            name, fetcher_class = fetcher_registration
+            if name == fetcher_name:
+                return {
+                    'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
+                    'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
+                    'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
+                }

+    # Default: no capabilities
    return {
-        'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
-        'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
-        'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False),
+        'supports_browser_steps': False,
+        'supports_screenshots': False,
+        'supports_xpath_element_data': False
    }


@@ -586,4 +688,20 @@ def apply_update_finalize(update_handler, watch, datastore, processing_exception
    except Exception as e:
        # Don't let plugin errors crash the worker
        logger.error(f"Error in update_finalize hook: {e}")
-        logger.exception(f"update_finalize hook exception details:")
+        logger.exception(f"update_finalize hook exception details:")
+
+
+def collect_html_head_extras():
+    """Collect and combine HTML head extras from all plugins.
+
+    Called from a Flask template global so it always runs inside a request context.
+    This means url_for() works correctly in plugin implementations, including when the
+    app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
+    sets SCRIPT_NAME so url_for() automatically prepends the prefix).
+
+    Returns:
+        str: Combined HTML string to inject inside <head>, or empty string
+    """
+    results = plugin_manager.hook.get_html_head_extras()
+    parts = [r for r in results if r]
+    return "\n".join(parts) if parts else ""
@@ -341,6 +341,18 @@ def get_processor_descriptions():
    return descriptions


+def wcag_text_color(hex_bg: str) -> str:
+    """Return #000000 or #ffffff for maximum WCAG contrast against hex_bg."""
+    hex_bg = hex_bg.lstrip('#')
+    if len(hex_bg) != 6:
+        return '#000000'
+    r, g, b = (int(hex_bg[i:i+2], 16) / 255 for i in (0, 2, 4))
+    def lin(c):
+        return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
+    L = 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)
+    return '#000000' if L > 0.179 else '#ffffff'
+
+
 def generate_processor_badge_colors(processor_name):
    """
    Generate consistent colors for a processor badge based on its name.
@@ -5,7 +5,7 @@ import hashlib
 from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
 from changedetectionio.content_fetchers.base import Fetcher
 from changedetectionio.strtobool import strtobool
-from changedetectionio.validate_url import is_private_hostname
+from changedetectionio.validate_url import is_private_hostname, is_url_private_or_parser_confused
 from copy import deepcopy
 from abc import abstractmethod
 import os
@@ -23,7 +23,6 @@ class difference_detection_processor():
    watch = None
    xpath_data = None
    preferred_proxy = None
-    preferred_proxy_override = None   # Set externally to force a specific proxy (e.g. proxy checker)
    screenshot_format = SCREENSHOT_FORMAT_JPEG
    last_raw_content_checksum = None

@@ -37,8 +36,6 @@ class difference_detection_processor():
        # 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
        # 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
        self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
-        if self.watch is None:
-            raise KeyError(f"Watch UUID {watch_uuid} not found in datastore (deleted before processing?)")

        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()
@@ -100,7 +97,6 @@ class difference_detection_processor():
            logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
            self.last_raw_content_checksum = None

-
    async def validate_iana_url(self):
        """Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
        Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
@@ -108,75 +104,92 @@ class difference_detection_processor():
        """
        if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
            return
-        parsed = urlparse(self.watch.link)
-        if not parsed.hostname:
-            return
        loop = asyncio.get_running_loop()
-        if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
+        # Use the parser-agnostic check so urlparse/urllib3 differentials (GHSA-rph4-96w6-q594)
+        # can't slip a private/internal hostname past this pre-flight gate.
+        if await loop.run_in_executor(None, is_url_private_or_parser_confused, self.watch.link):
            raise Exception(
-                f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
+                f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address "
+                f"or contains a parser-differential payload. "
                f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
            )

-    async def call_browser(self):
+    async def call_browser(self, preferred_proxy_id=None):

        from requests.structures import CaseInsensitiveDict
-        from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS

        url = self.watch.link

-        # Protect against file:, file:/, file:// access
+        # Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
        if re.search(r'^file:', url.strip(), re.IGNORECASE):
            if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
-                raise Exception("file:// type access is denied for security reasons.")
+                raise Exception(
+                    "file:// type access is denied for security reasons."
+                )

        await self.validate_iana_url()

-        # Resolve the full browser profile for this watch (watch → tag → global → built-in)
-        profile = resolve_browser_profile(self.watch, self.datastore)
+        # Requests, playwright, other browser via wss:// etc, fetch_extra_something
+        prefer_fetch_backend = self.watch.get('fetch_backend', 'system')

-        # PDFs always use the requests fetcher — browsers render them in an embedded viewer
+        # Proxy ID "key"
+        preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(
+            uuid=self.watch.get('uuid'))
+
+        # Pluggable content self.fetcher
+        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
+            prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
+
+        # In the case that the preferred fetcher was a browser config with custom connection URL..
+        # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
+        custom_browser_connection_url = None
+        if prefer_fetch_backend.startswith('extra_browser_'):
+            (t, key) = prefer_fetch_backend.split('extra_browser_')
+            connection = list(
+                filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
+            if connection:
+                prefer_fetch_backend = 'html_webdriver'
+                custom_browser_connection_url = connection[0].get('browser_connection_url')
+
+        # PDF should be html_requests because playwright will serve it up (so far) in a embedded page
        # @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
+        # @todo needs test to or a fix
        if self.watch.is_pdf:
-            profile = BUILTIN_REQUESTS
+            prefer_fetch_backend = "html_requests"

-        # Resolve proxy for the target URL fetch.
-        # Note: browser_connection_url is the WebSocket endpoint to reach the remote browser,
-        # which is separate from the proxy used by the browser to fetch target pages.
-        proxy_url = self.datastore.get_proxy_url_for_watch(self.watch.get('uuid'), override_id=self.preferred_proxy_override)
-        if proxy_url:
-            logger.debug(f"Proxy '{proxy_url}' for {url}")
-
-        logger.debug(f"BrowserProfile '{profile.get_machine_name()}' (fetcher={profile.fetch_backend}) for watch {self.watch['uuid']}")
-
-        # Select the fetcher class
+        # Grab the right kind of 'fetcher', (playwright, requests, etc)
        from changedetectionio import content_fetchers
-        fetcher_class_name = profile.get_fetcher_class_name()
+        if hasattr(content_fetchers, prefer_fetch_backend):
+            # @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
+            if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
+                # This is never supported in selenium anyway
+                logger.warning(
+                    "Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
+                from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
+                fetcher_obj = playwright_fetcher
+            else:
+                fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
+        else:
+            # What it referenced doesnt exist, Just use a default
+            fetcher_obj = getattr(content_fetchers, "html_requests")

-        fetcher_obj = content_fetchers.get_fetcher(fetcher_class_name)
-        if fetcher_obj is None:
-            logger.warning(f"Fetcher '{fetcher_class_name}' not found, falling back to requests")
-            fetcher_obj = content_fetchers.get_fetcher('requests')
-        elif self.watch.has_browser_steps and not getattr(fetcher_obj, 'supports_browser_steps', False):
-            # Browser steps require Playwright — override if the resolved fetcher doesn't support them
-            logger.warning(f"Fetcher '{fetcher_class_name}' does not support browser steps, overriding to Playwright")
-            fetcher_obj = content_fetchers.get_fetcher('playwright')
+        proxy_url = None
+        if preferred_proxy_id:
+            # Custom browser endpoints should NOT have a proxy added
+            if not prefer_fetch_backend.startswith('extra_browser_'):
+                proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
+                logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
+            else:
+                logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")

-        self.fetcher = fetcher_obj(
-            proxy_override=proxy_url,
-            custom_browser_connection_url=profile.browser_connection_url,
-            screenshot_format=self.screenshot_format,
-            # BrowserProfile fields — browser fetchers use these; html_requests ignores them
-            viewport_width=profile.viewport_width,
-            viewport_height=profile.viewport_height,
-            block_images=profile.block_images,
-            block_fonts=profile.block_fonts,
-            profile_user_agent=profile.user_agent,
-            ignore_https_errors=profile.ignore_https_errors,
-            locale=profile.locale,
-            service_workers=profile.service_workers,
-            extra_delay=profile.extra_delay,
-        )
+        logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
+
+        # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
+        # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
+        self.fetcher = fetcher_obj(proxy_override=proxy_url,
+                                   custom_browser_connection_url=custom_browser_connection_url,
+                                   screenshot_format=self.screenshot_format
+                                   )

        if self.watch.has_browser_steps:
            self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', []))
@@ -186,17 +199,9 @@ class difference_detection_processor():
        from changedetectionio.jinja2_custom import render as jinja_render
        request_headers = CaseInsensitiveDict()

-        # Browser profile: UA override (lowest priority — watch headers override this)
-        if profile.user_agent:
-            request_headers['User-Agent'] = profile.user_agent
-
-        # Browser profile: custom headers (override profile UA, but watch headers override these)
-        if profile.custom_headers:
-            for line in profile.custom_headers.splitlines():
-                line = line.strip()
-                if not line.startswith('#') and ':' in line:
-                    k, v = line.split(':', 1)
-                    request_headers[k.strip()] = v.strip()
+        ua = self.datastore.data['settings']['requests'].get('default_ua')
+        if ua and ua.get(prefer_fetch_backend):
+            request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})

        request_headers.update(self.watch.get('headers', {}))
        request_headers.update(self.datastore.get_all_base_headers())
@@ -253,7 +258,6 @@ class difference_detection_processor():

        # @todo .quit here could go on close object, so we can run JS if change-detected
        await self.fetcher.quit(watch=self.watch)
-        self.fetcher.disk_cleanup_after_fetch()

        # Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
        # content that gets decoded into surrogate characters (e.g. \udcad). Without this,
@@ -42,7 +42,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
    # Get error information for the template
    screenshot_url = watch.get_screenshot()

-    fetcher_supports_screenshots = watch.fetcher_supports_screenshots
+    is_html_webdriver = watch.fetcher_supports_screenshots

    password_enabled_and_share_is_off = False
    if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
@@ -59,9 +59,9 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
        last_error_screenshot=watch.get_error_snapshot(),
        last_error_text=watch.get_error_text(),
        screenshot=screenshot_url,
-        fetcher_supports_screenshots=fetcher_supports_screenshots,
+        is_html_webdriver=is_html_webdriver,
        password_enabled_and_share_is_off=password_enabled_and_share_is_off,
-        extra_title=f" - {watch.label} - Extract Data",
+        extra_title=f" - {watch.label} - {gettext('Extract Data')}",
        extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
        pure_menu_fixed=False
    )
@@ -45,7 +45,7 @@ class Restock(dict):
            'in_stock': None,
            'price': None,
            'currency': None,
-            'original_price': None
+            'last_price': None  # Price recorded at the most recent check (was misleadingly named 'original_price')
        }

        # Initialize the dictionary with default values
@@ -59,8 +59,8 @@ class Restock(dict):
                raise ValueError("Only one positional argument of type 'dict' is allowed")

    def __setitem__(self, key, value):
-        # Custom logic to handle setting price and original_price
-        if key == 'price' or key == 'original_price':
+        # Custom logic to handle setting price and last_price
+        if key == 'price' or key == 'last_price':
            if isinstance(value, str):
                value = self.parse_currency(raw_value=value)

@@ -89,7 +89,8 @@ class Watch(BaseWatch):

    def extra_notification_token_values(self):
        values = super().extra_notification_token_values()
-        values['restock'] = self.get('restock', {})
+        # Copy so the derived 'previous_price' token added below doesn't mutate the stored restock object
+        values['restock'] = dict(self.get('restock', {}))

        values['restock']['previous_price'] = None
        if self.history_n >= 2:
@@ -109,7 +110,7 @@ class Watch(BaseWatch):

        values.append(('restock.price', "Price detected"))
        values.append(('restock.in_stock', "In stock status"))
-        values.append(('restock.original_price', "Original price at first check"))
+        values.append(('restock.last_price', "Price at the previous check"))
        values.append(('restock.previous_price', "Previous price in history"))

        return values
@@ -22,7 +22,7 @@ class RestockSettingsForm(Form):
                                  render_kw={"placeholder": _l("No limit"), "size": "10"})
    price_change_max = FloatField(_l('Above price to trigger notification'), [validators.Optional()],
                                  render_kw={"placeholder": _l("No limit"), "size": "10"})
-    price_change_threshold_percent = FloatField(_l('Threshold in %% for price changes since the original price'), validators=[
+    price_change_threshold_percent = FloatField(_l('Threshold (%) for price changes since the previous check'), validators=[

        validators.Optional(),
        validators.NumberRange(min=0, max=100, message=_l("Should be between 0 and 100")),
@@ -73,8 +73,8 @@ class processor_settings_form(processor_text_json_diff_form):
                </fieldset>
                <fieldset class="pure-group price-change-minmax">
                    {{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
-                    <span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
-                    <span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
+                    <span class="pure-form-message-inline">Price must change more than this % since the previous check to trigger a change.</span><br>
+                    <span class="pure-form-message-inline">For example, if the previous check saw the product at $1,000 USD, <strong>2%</strong> would mean it has to change more than $20 since then.</span><br>
                </fieldset>
            </div>
        </fieldset>
@@ -0,0 +1,300 @@
+"""
+LLM fallback plugin for price and restock info extraction.
+
+When the built-in structured-metadata extraction (JSON-LD, microdata, OpenGraph)
+fails to produce both a price and availability, this plugin is called as a last
+resort.  It sends a trimmed, HTML-stripped version of the page to the configured
+LLM and asks it to return a structured JSON answer.
+
+The module-level `datastore` variable is injected at startup by
+`inject_datastore_into_plugins()` in pluggy_interface.py.
+"""
+import json
+import re
+from loguru import logger
+from changedetectionio.pluggy_interface import hookimpl
+from changedetectionio.llm.evaluator import apply_local_token_multiplier
+
+# Injected at startup by inject_datastore_into_plugins()
+datastore = None
+
+SYSTEM_PROMPT = (
+    'You are an expert price and restock extraction utility. '
+    'Your task is to analyse a product page and determine the price and stock status of the MAIN product only.\n\n'
+
+    'AVAILABILITY — treat as "in stock":\n'
+    '- Action buttons near the product: "Add to cart", "Add to basket", "Buy now", '
+    '"Order now", "Purchase", "Import", "Add to bag", "Add to trolley", "In stock", '
+    '"Available", "Ships in X days/weeks", "In store", "Pick up today".\n'
+    '- "Pre-order" or "Reserve" — the item is orderable, treat as "in stock".\n'
+    '- "Only X left", "Almost gone", "Low stock", "Limited availability" — still in stock.\n'
+    '- "Request a quote" or "Contact us for pricing" — item is available, price is null.\n'
+    '- IMPORTANT: Ignore cart/basket/bag links in the page HEADER or navigation bar '
+    '(e.g. a shopping cart icon showing item count). That reflects what is already in '
+    'the visitor\'s cart — it says nothing about whether THIS product is available.\n\n'
+
+    'PRICE — what NOT to use:\n'
+    '- A "$0.00" or "0" that appears near header/nav links such as "Login", "Wishlist", '
+    '"Contact Us", "My Account" is an empty shopping-cart indicator, NOT the product price. '
+    'Ignore it entirely — return null for price rather than 0 in this situation.\n'
+    '- Only return 0 (free) when the page clearly states the product itself costs nothing '
+    '(e.g. "Free", "Free download", "Price: $0").\n\n'
+
+    'AVAILABILITY — treat as "out of stock":\n'
+    '- "Out of stock", "Sold out", "Unavailable", "Currently unavailable", '
+    '"Temporarily out of stock", "Discontinued", "No longer available", '
+    '"Notify me when available", "Email me when back", "Join waitlist".\n\n'
+
+    'AVAILABILITY — return null when uncertain:\n'
+    '- The page asks the user to select a size, colour, or other variant first '
+    '("Select an option", "Choose a size") — availability depends on the variant, so return null.\n'
+    '- You cannot clearly tell from the page content whether the item is available.\n\n'
+
+    'PRICE rules:\n'
+    '- Extract the main selling price as a plain number, no currency symbol.\n'
+    '- Prices may use any popular locale format — interpret them all correctly and return a plain decimal number. '
+    'Examples: "10 000 Kč" = 10000, "1.299,95 €" = 1299.95, "1,299.95" = 1299.95, '
+    '"10 000,50" = 10000.50, "£1.299" = 1299, "¥10000" = 10000.\n'
+    '- If both an original (crossed-out) price and a sale/current price appear, use the sale price.\n'
+    '- "From $X" or "Starting at $X" are teaser prices — prefer a definite price or return null.\n'
+    '- A price of 0 (free) is valid — return 0, not null.\n'
+    '- If pricing requires a quote or login, return null for price.\n'
+    '- Ignore prices shown in search/filter UI elements (e.g. "Price from: — to:").\n'
+    '- IMPORTANT: Ignore ALL prices that appear inside or below recommendation/discovery blocks '
+    'such as: "Similar items", "You may also like", "Customers also bought", '
+    '"Based on your browsing", "Based on your shopping", "Frequently bought together", '
+    '"People also viewed", "Related products", "Sponsored products", "More like this", '
+    '"Other sellers", "Compare with similar items". '
+    'These sections contain prices for OTHER products, not the main product.\n'
+    '- When multiple prices appear on the page, prefer the price that is positioned '
+    'earliest/highest in the page content — it is almost always the main product price. '
+    'Prices appearing after large blocks of descriptive text or review sections are '
+    'likely from recommendation widgets and should be ignored.\n\n'
+
+    'CLASSIFIEDS AND LISTING PAGES:\n'
+    '- On classifieds or marketplace sites (e.g. eBay listings, Craigslist, Bazoš, Gumtree), '
+    'if a price is shown alongside seller contact details or a "Contact seller" link, '
+    'treat the item as "instock" — the listing being active means it is available.\n\n'
+
+    'Return ONLY a JSON object with exactly these three keys:\n'
+    '  "price"        — number or null\n'
+    '  "currency"     — ISO-4217 code (USD, EUR, GBP …) or null\n'
+    '  "availability" — exactly one of: "instock", "outofstock", or null\n'
+    '                   Use "instock" when the product can be ordered/purchased.\n'
+    '                   Use "outofstock" when it cannot.\n'
+    '                   Use null when you genuinely cannot tell.\n'
+    'No markdown, no backticks, no explanation — pure JSON only.'
+)
+
+_MAX_CONTENT_CHARS = 8_000
+
+
+def _extract_jsonld(html_content: str) -> str:
+    """Extract JSON-LD blocks — these contain reliable structured product data."""
+    blocks = re.findall(
+        r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>',
+        html_content, flags=re.DOTALL | re.IGNORECASE
+    )
+    if not blocks:
+        return ''
+    combined = ' '.join(b.strip() for b in blocks)
+    return combined[:2000]
+
+
+# Semantic tags always treated as chrome (nav/header/footer)
+_CHROME_TAGS = {'nav', 'header', 'footer', 'aside'}
+
+# id/class fragments that strongly indicate navigation or site-chrome
+_CHROME_PATTERNS = re.compile(
+    r'\b(nav|navigation|navbar|menu|mega-menu|breadcrumb|breadcrumbs?|'
+    r'site-header|page-header|top-bar|top-nav|top-header|mobile-nav|header-bar|'
+    r'site-footer|page-footer|footer-links|related|similar|'
+    r'you-?may-?also|customers?-?also|frequently-?bought|'
+    r'people-?also|sponsored|recommendation|widget|sidebar|'
+    r'cross-?sell|up-?sell)\b',
+    re.IGNORECASE,
+)
+
+
+def _remove_chrome(html_content: str) -> str:
+    """Use BS4 to strip navigation, header, footer and recommendation noise.
+
+    Uses html.parser (built-in, no lxml) to avoid memory leak issues.
+    Falls back to the original HTML string if BS4 fails for any reason.
+    """
+    try:
+        from bs4 import BeautifulSoup, Tag
+        soup = BeautifulSoup(html_content, 'html.parser')
+
+        # Snapshot the full tag list before any decompositions so we don't
+        # mutate the tree while iterating it.  After a parent is decomposed
+        # its children become orphans (parent=None) — skip those.
+        for tag in list(soup.find_all(True)):
+            if not isinstance(tag, Tag) or tag.parent is None:
+                continue
+            name = tag.name or ''
+            if name in _CHROME_TAGS:
+                tag.decompose()
+                continue
+            try:
+                cls_list = tag.get('class') or []
+                cls_str = ' '.join(cls_list) if isinstance(cls_list, list) else str(cls_list)
+                id_str = tag.get('id') or ''
+            except Exception:
+                continue
+            if _CHROME_PATTERNS.search(cls_str + ' ' + id_str):
+                tag.decompose()
+
+        return str(soup)
+    except Exception as e:
+        logger.debug(f"BS4 chrome removal failed ({e}), using raw HTML")
+        return html_content
+
+
+def _strip_html(html_content: str) -> str:
+    """HTML-to-text for LLM consumption.
+
+    1. Extracts JSON-LD (structured product data) to prepend.
+    2. Strips nav/header/footer/recommendation blocks via BS4.
+    3. Removes all remaining tags and collapses whitespace.
+    JSON-LD is prepended so reliable price/availability data is always visible
+    to the LLM regardless of how deep it sits in the page.
+    """
+    jsonld = _extract_jsonld(html_content)
+
+    # Remove site-chrome before generic tag stripping
+    cleaned = _remove_chrome(html_content)
+
+    # Drop HTML comments (can contain large disabled markup blocks)
+    text = re.sub(r'<!--.*?-->', ' ', cleaned, flags=re.DOTALL)
+    # Drop all <script> and <style> blocks
+    text = re.sub(r'<(script|style)[^>]*>.*?</(script|style)>', ' ', text, flags=re.DOTALL | re.IGNORECASE)
+    # Strip remaining tags
+    text = re.sub(r'<[^>]+>', ' ', text)
+    # Decode common entities
+    text = (text
+            .replace('&nbsp;', ' ')
+            .replace('&amp;', '&')
+            .replace('&lt;', '<')
+            .replace('&gt;', '>')
+            .replace('&quot;', '"')
+            .replace('&#39;', "'"))
+    text = re.sub(r'\s+', ' ', text).strip()
+
+    if jsonld:
+        budget = _MAX_CONTENT_CHARS - len(jsonld) - 1
+        return (jsonld + ' ' + text[:budget]).strip()
+    return text[:_MAX_CONTENT_CHARS]
+
+
+@hookimpl
+def get_itemprop_availability_override(content, fetcher_name, fetcher_instance, url, llm_intent=None):
+    """Use an LLM as a last-resort fallback for price and restock extraction."""
+    global datastore
+
+    if datastore is None:
+        logger.debug("LLM restock fallback: no datastore injected yet, skipping")
+        return None
+
+    try:
+        from changedetectionio.llm.evaluator import _runtime_llm_config, accumulate_global_tokens, get_llm_settings
+        from changedetectionio.llm import client as llm_client
+    except ImportError as e:
+        logger.debug(f"LLM restock fallback: LLM libraries not available ({e})")
+        return None
+
+    # Gate on the user setting (default True — enabled out of the box)
+    if not get_llm_settings(datastore).restock_use_fallback_extract:
+        logger.debug("LLM restock fallback: disabled in settings")
+        return None
+
+    # _runtime_llm_config returns None (with a debug log) when the master 'llm_enabled'
+    # toggle is off, so this path is gated for free.
+    llm_cfg = _runtime_llm_config(datastore)
+    if not llm_cfg or not llm_cfg.get('model'):
+        logger.debug("LLM restock fallback: no LLM model configured or LLM disabled, skipping")
+        return None
+
+    text_content = _strip_html(content) if content else ''
+    logger.debug(f"LLM restock fallback: stripped HTML to {len(text_content)} chars for {url}")
+    if not text_content.strip():
+        logger.debug("LLM restock fallback: no text content after stripping HTML")
+        return None
+
+    logger.info(f"LLM restock fallback: using LLM ({llm_cfg['model']}) for price/stock extraction - {url}")
+
+    user_prompt = f'URL: {url or "unknown"}\n\nPage content:\n{text_content}'
+    if llm_intent:
+        user_prompt += f'\n\nUser notification intent: {llm_intent}'
+
+    try:
+        raw, tokens, input_tokens, output_tokens = llm_client.completion(
+            model=llm_cfg['model'],
+            messages=[
+                {'role': 'system', 'content': SYSTEM_PROMPT},
+                {'role': 'user', 'content': user_prompt},
+            ],
+            api_key=llm_cfg.get('api_key'),
+            api_base=llm_cfg.get('api_base'),
+            # 80 fits a {price, currency, availability} JSON answer comfortably for cloud
+            # models. Local reasoning models burn most of that on chain-of-thought before
+            # the JSON lands — the multiplier scales it up only when provider_kind says so.
+            max_tokens=apply_local_token_multiplier(80, llm_cfg),
+        )
+
+        accumulate_global_tokens(
+            datastore, tokens,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            model=llm_cfg['model'],
+        )
+
+        # Strip optional markdown fences the model might add
+        raw = raw.strip()
+        if raw.startswith('```'):
+            raw = re.sub(r'^```[a-z]*\n?', '', raw)
+            raw = raw.rstrip('`').strip()
+
+        logger.debug(f"LLM restock fallback raw response: {raw!r}")
+
+        result = json.loads(raw)
+
+        price = result.get('price')
+        currency = result.get('currency') or None
+        availability = result.get('availability') or None
+
+        # Normalise price to float
+        if price is not None:
+            try:
+                if isinstance(price, str):
+                    price = float(re.sub(r'[^\d.]', '', price))
+                else:
+                    price = float(price)
+            except (ValueError, TypeError):
+                logger.warning(f"LLM restock fallback: could not convert price {price!r} to float, ignoring")
+                price = None
+
+        if price is None and not availability:
+            logger.info(f"LLM restock fallback: LLM returned no usable price or availability for {url} (raw: {raw!r})")
+            return None
+
+        logger.info(
+            f"LLM restock fallback result: price={price} currency={currency} "
+            f"availability={availability!r} url={url}"
+        )
+        return {
+            'price': price,
+            'currency': currency,
+            'availability': availability,
+            '_tokens': tokens,
+            '_input_tokens': input_tokens,
+            '_output_tokens': output_tokens,
+            '_model': llm_cfg['model'],
+        }
+
+    except json.JSONDecodeError as e:
+        logger.warning(f"LLM restock fallback: JSON parse failed ({e}) - raw response was: {raw!r}")
+        return None
+    except Exception as e:
+        logger.warning(f"LLM restock fallback: extraction failed for {url}: {e}")
+        return None
@@ -486,19 +486,42 @@ class perform_site_check(difference_detection_processor):
        has_price = itemprop_availability.get('price') is not None
        has_availability = itemprop_availability.get('availability') is not None

-        # @TODO !!! some setting like "Use as fallback" or "always use", "t
-        if not (has_price and has_availability) or True:
+        if not (has_price and has_availability):
            from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
-            # Use the actual resolved fetcher name from the fetcher instance
-            fetcher_name = self.watch.effective_browser_profile.fetch_backend
-            logger.debug(f"Resolved effective fetcher: {fetcher_name}")
+            fetcher_name = watch.get('fetch_backend', 'html_requests')
+
+            # Resolve 'system' to the actual fetcher being used
+            # This allows plugins to work even when watch uses "system settings default"
+            if fetcher_name == 'system':
+                # Get the actual fetcher that was used (from self.fetcher)
+                # Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
+                actual_fetcher = type(self.fetcher).__name__
+                if 'html_requests' in actual_fetcher.lower():
+                    fetcher_name = 'html_requests'
+                elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
+                    fetcher_name = 'html_webdriver'
+                logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")

            # Try plugin override - plugins can decide if they support this fetcher
            if fetcher_name:
                logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})")
-                plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
+                from changedetectionio.llm.evaluator import resolve_intent
+                _llm_intent, _ = resolve_intent(watch, self.datastore)
+                plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link, llm_intent=_llm_intent or None)

                if plugin_availability:
+                    # Extract and strip LLM token metadata before using as Restock data
+                    _plugin_tokens = plugin_availability.pop('_tokens', 0)
+                    _plugin_input_tokens = plugin_availability.pop('_input_tokens', 0)
+                    _plugin_output_tokens = plugin_availability.pop('_output_tokens', 0)
+                    _plugin_model = plugin_availability.pop('_model', '')
+
+                    # Update per-watch token counters directly on the watch (same
+                    # pattern as evaluator.py) so they're committed when update_watch runs
+                    if _plugin_tokens:
+                        watch['llm_last_tokens_used'] = _plugin_tokens
+                        watch['llm_tokens_used_cumulative'] = (watch.get('llm_tokens_used_cumulative') or 0) + _plugin_tokens
+
                    # Plugin provided better data, use it
                    plugin_has_price = plugin_availability.get('price') is not None
                    plugin_has_availability = plugin_availability.get('availability') is not None
@@ -541,10 +564,15 @@ class perform_site_check(difference_detection_processor):
        # Main detection method
        fetched_md5 = None

-        # store original price if not set
-        if itemprop_availability and itemprop_availability.get('price') and not itemprop_availability.get('original_price'):
-            itemprop_availability['original_price'] = itemprop_availability.get('price')
-            update_obj['restock']["original_price"] = itemprop_availability.get('price')
+        # Record this check's price as 'last_price'. The freshly scraped itemprop never carries
+        # last_price, so this is (re)set on every check - i.e. last_price always holds the price
+        # from the most recent check, and at comparison time below it is the PREVIOUS check's price.
+        if itemprop_availability and itemprop_availability.get('price') and not itemprop_availability.get('last_price'):
+            itemprop_availability['last_price'] = itemprop_availability.get('price')
+            update_obj['restock']["last_price"] = itemprop_availability.get('price')
+            logger.debug(
+                f"{watch.get('uuid')} Updating price - setting 'last_price' to '{itemprop_availability.get('price')}' "
+                f"(previously stored 'last_price' was '{(watch.get('restock') or {}).get('last_price')}'). ")

        if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
            raise ProcessorException(
@@ -594,9 +622,13 @@ class perform_site_check(difference_detection_processor):

        if restock_settings.get('follow_price_changes') and watch.get('restock') and update_obj.get('restock') and update_obj['restock'].get('price'):
            price = float(update_obj['restock'].get('price'))
-            # Default to current price if no previous price found
-            if watch['restock'].get('original_price'):
-                previous_price = float(watch['restock'].get('original_price'))
+            # Compare against last_price (the price from the previous check)
+            if watch['restock'].get('last_price'):
+                previous_price = float(watch['restock'].get('last_price'))
+                logger.debug(
+                    f"{watch.get('uuid')} Comparing NEW price '{price}' against stored 'last_price' '{previous_price}' "
+                    f"(watch's stored current price was '{(watch.get('restock') or {}).get('price')}') -> "
+                    f"price {'CHANGED' if price != previous_price else 'unchanged'}")
                # It was different, but negate it further down
                if price != previous_price:
                    changed_detected = True
@@ -619,11 +651,14 @@ class perform_site_check(difference_detection_processor):
                        else:
                            logger.trace(f"{watch.get('uuid')} {price} is between {min_limit} and {max_limit}, continuing normal comparison")

-                    # Price comparison by %
-                    if watch['restock'].get('original_price') and changed_detected and restock_settings.get('price_change_threshold_percent'):
-                        previous_price = float(watch['restock'].get('original_price'))
+                    # Price comparison by % - against last_price (the previous check's price)
+                    if watch['restock'].get('last_price') and changed_detected and restock_settings.get('price_change_threshold_percent'):
+                        previous_price = float(watch['restock'].get('last_price'))
                        pc = float(restock_settings.get('price_change_threshold_percent'))
                        change = abs((price - previous_price) / previous_price * 100)
+                        logger.debug(
+                            f"{watch.get('uuid')} % threshold check - comparing NEW price '{price}' against stored "
+                            f"'last_price' '{previous_price}' = {change:.3f}% change (threshold {pc}%)")
                        if change and change <= pc:
                            logger.debug(f"{watch.get('uuid')} Override change-detected to FALSE because % threshold ({pc}%) was {change:.3f}%")
                            changed_detected = False
@@ -3,11 +3,11 @@
 {% block content %}
    <div class="tabs">
    <ul>
-        {% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">Error Text</a></li> {% endif %}
-        {% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">Error Screenshot</a></li> {% endif %}
-        <li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">Text</a></li>
-        <li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">Screenshot</a></li>
-        <li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">Extract Data</a></li>
+        {% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">{{ _('Error Text') }}</a></li> {% endif %}
+        {% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">{{ _('Error Screenshot') }}</a></li> {% endif %}
+        <li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">{{ _('Text') }}</a></li>
+        <li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">{{ _('Screenshot') }}</a></li>
+        <li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">{{ _('Extract Data') }}</a></li>
    </ul>
 </div>

@@ -17,23 +17,23 @@
        <form id="extract-data-form" class="pure-form pure-form-stacked edit-form"  action="{{ url_for('ui.ui_diff.diff_history_page_extract_POST', uuid=uuid) }}"  method="POST">
            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">

-            <p>This tool will extract text data from all of the watch history.</p>
+            <p>{{ _('This tool will extract text data from all of the watch history.') }}</p>

            <div class="pure-control-group">
                {{ render_field(extract_form.extract_regex) }}
                <span class="pure-form-message-inline">
-                    A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
+                    {{ _('A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.')|safe }}<br>

                    <p>
-                        For example, to extract only the numbers from text &dash;<br>
-                        <strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
-                        <strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
+                        {{ _('For example, to extract only the numbers from text') }} &dash;<br>
+                        <strong>{{ _('Raw text') }}</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
+                        <strong>{{ _('RegEx to extract:') }}</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
                    </p>
                    <p>
-                        <a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
+                        <a href="https://RegExr.com/">{{ _('Be sure to test your RegEx here.') }}</a>
                    </p>
                    <p>
-                        Each RegEx group bracket <code>()</code> will be in its own column, the first column value is always the date.
+                        {{ _('Each RegEx group bracket') }} <code>()</code> {{ _('will be in its own column, the first column value is always the date.') }}
                    </p>
                </span>
            </div>
@@ -35,6 +35,50 @@ def _task(watch, update_handler):
    return text_after_filter


+def _compute_ignore_line_numbers_for_preview(text_pre_extract, ignore_patterns, extract_patterns):
+    """1-indexed output line numbers in the post-extract display that correspond
+    to input lines matching ignore_text patterns.
+
+    Needed because extract_text (#4138) transforms line content — e.g. "0.54.10"
+    becomes ".54.10" — so a substring match for "0.54.10" against the post-extract
+    text fails and the preview UI can no longer mark the line as ignored. We find
+    the ignored line numbers in the pre-extract text and replay extract_by_regex
+    line-by-line to map them forward.
+    """
+    from changedetectionio import html_tools
+    from changedetectionio.processors.text_json_diff.processor import ContentTransformer
+
+    if not text_pre_extract or not ignore_patterns:
+        return []
+
+    ignored_input_lines = set(
+        html_tools.strip_ignore_text(
+            content=text_pre_extract,
+            wordlist=ignore_patterns,
+            mode='line numbers'
+        )
+    )
+    if not ignored_input_lines:
+        return []
+
+    if not extract_patterns:
+        return sorted(ignored_input_lines)
+
+    # Replay extract_by_regex per-line. Each emitted match ends with exactly one
+    # '\n', so counting newlines tells us how many output lines this input produced.
+    output_line_counter = 0
+    result = []
+    for input_idx, line in enumerate(text_pre_extract.splitlines()):
+        is_ignored = (input_idx + 1) in ignored_input_lines
+        matches_in_line = ContentTransformer.extract_by_regex(line, extract_patterns).count('\n')
+        for _ in range(matches_in_line):
+            output_line_counter += 1
+            if is_ignored:
+                result.append(output_line_counter)
+
+    return result
+
+
 def prepare_filter_prevew(datastore, watch_uuid, form_data):
    '''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
    from changedetectionio import forms, html_tools
@@ -50,6 +94,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):

    text_after_filter = ''
    text_before_filter = ''
+    text_pre_extract = ''
    trigger_line_numbers = []
    ignore_line_numbers = []
    blocked_line_numbers = []
@@ -65,6 +110,12 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
        # Only update vars that came in via the AJAX post
        p = {k: v for k, v in form.data.items() if k in form_data.keys()}
        tmp_watch.update(p)
+
+        # Apply llm_intent from form directly — it's not part of processor_text_json_diff_form
+        # but the AJAX sends all visible inputs, so it arrives in form_data
+        if hasattr(form_data, 'get') and 'llm_intent' in form_data:
+            tmp_watch['llm_intent'] = (form_data.get('llm_intent') or '').strip()
+
        blank_watch_no_filters = watch_model(datastore_path=datastore.datastore_path, __datastore=datastore.data)
        blank_watch_no_filters['url'] = tmp_watch.get('url')

@@ -83,15 +134,22 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
            update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
            update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')

-            # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
+            # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk.
+            # The third task runs with extract_text cleared so we can compute ignore_line_numbers
+            # against the pre-extract text (extract_text transforms lines so post-extract substring
+            # matching for ignore patterns would otherwise fail — see #4138 follow-up).
            # Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
+            tmp_watch_no_extract = deepcopy(tmp_watch)
+            tmp_watch_no_extract['extract_text'] = []
            try:
-                with ThreadPoolExecutor(max_workers=2) as executor:
+                with ThreadPoolExecutor(max_workers=3) as executor:
                    future1 = executor.submit(_task, tmp_watch, update_handler)
                    future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
+                    future3 = executor.submit(_task, tmp_watch_no_extract, update_handler)

                    text_after_filter = future1.result()
                    text_before_filter = future2.result()
+                    text_pre_extract = future3.result()
            except Exception as e:
                x=1

@@ -105,10 +163,11 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):

    try:
        text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
-        ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
-                                                           wordlist=text_to_ignore,
-                                                           mode='line numbers'
-                                                           )
+        ignore_line_numbers = _compute_ignore_line_numbers_for_preview(
+            text_pre_extract=text_pre_extract,
+            ignore_patterns=text_to_ignore,
+            extract_patterns=tmp_watch.get('extract_text', [])
+        )
    except Exception as e:
        text_before_filter = f"Error: {str(e)}"

@@ -120,6 +179,18 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
    except Exception as e:
        text_before_filter = f"Error: {str(e)}"

+    # LLM preview extraction — asks the LLM to directly answer the intent
+    # against the current filtered content (no diff comparison).
+    # e.g. intent "how many articles?" → answer "30 articles listed"
+    # Results are NOT cached back to the real watch.
+    llm_evaluation = None
+    try:
+        from changedetectionio.llm.evaluator import preview_extract
+        if text_after_filter and text_after_filter.strip() not in ('', 'Empty content'):
+            llm_evaluation = preview_extract(tmp_watch, datastore, content=text_after_filter)
+    except Exception as e:
+        logger.warning(f"LLM preview evaluation failed for {watch_uuid}: {e}")
+
    logger.trace(f"Parsed in {time.time() - now:.3f}s")

    return ({
@@ -128,6 +199,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
        'blocked_line_numbers': blocked_line_numbers,
        'duration': time.time() - now,
        'ignore_line_numbers': ignore_line_numbers,
+        'llm_evaluation': llm_evaluation,
        'trigger_line_numbers': trigger_line_numbers,
        })

@@ -7,6 +7,7 @@ a side-by-side or unified diff view with syntax highlighting and change markers.

 import os
 import time
+from flask_babel import gettext
 from loguru import logger

 from changedetectionio import diff, strtobool
@@ -97,6 +98,7 @@ DIFF_PREFERENCES_CONFIG = {
    'added': {'default': True, 'type': 'bool'},
    'replaced': {'default': True, 'type': 'bool'},
    'type': {'default': 'diffLines', 'type': 'value'},
+    'llm_all_changes': {'default': False, 'type': 'bool'},
 }

 def render(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
@@ -154,7 +156,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,

    screenshot_url = watch.get_screenshot()

-    fetcher_supports_screenshots = watch.fetcher_supports_screenshots
+    is_html_webdriver = watch.fetcher_supports_screenshots

    password_enabled_and_share_is_off = False
    if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
@@ -198,6 +200,36 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
    if str(from_version) != str(dates[-2]) or str(to_version) != str(dates[-1]):
        note = 'Note: You are not viewing the latest changes.'

+    llm_configured = bool(
+        datastore.data.get('settings', {}).get('application', {}).get('llm', {}).get('model')
+    )
+
+    # Load cached AI diff summary for this exact from→to + prompt combination
+    viewing_latest = str(to_version) == str(dates[-1])
+    llm_diff_summary = ''
+    llm_summary_prompt = ''
+    if llm_configured:
+        try:
+            from changedetectionio.llm.evaluator import (
+                get_effective_summary_prompt, build_summary_cache_prompt,
+            )
+            _prompt = get_effective_summary_prompt(watch, datastore)
+            llm_summary_prompt = _prompt
+            # Must match the cache_prompt the worker writes and the UI ajax route reads —
+            # using UI default diff prefs so the initial render finds the worker's pre-cache.
+            from changedetectionio.llm.evaluator import get_llm_settings
+            _ls = get_llm_settings(datastore)
+            _max_summary_tokens = _ls.max_summary_tokens
+            _llm_model = _ls.model
+            _cache_prompt = build_summary_cache_prompt(
+                effective_prompt=_prompt,
+                max_summary_tokens=_max_summary_tokens,
+                model=_llm_model,
+            )
+            llm_diff_summary = watch.get_llm_diff_summary(from_version, to_version, prompt=_cache_prompt)
+        except Exception as e:
+            logger.warning(f"Could not load llm-diff-summary for {uuid}: {e}")
+
    output = render_template("diff.html",
                             #initial_scroll_line_number=100,
                             bottom_horizontal_offscreen_contents=offscreen_content,
@@ -205,12 +237,12 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
                             current_diff_url=watch['url'],
                             diff_cell_grid=diff_cell_grid,
                             diff_prefs=diff_prefs,
-                             extra_classes='difference-page',
+                             extra_classes=' '.join(filter(None, ['difference-page', 'llm-configured' if llm_configured else ''])),
                             extra_stylesheets=extra_stylesheets,
-                             extra_title=f" - {watch.label} - History",
+                             extra_title=f" - {watch.label} - {gettext('History')}",
                             extract_form=extract_form,
                             from_version=str(from_version),
-                             fetcher_supports_screenshots=fetcher_supports_screenshots,
+                             is_html_webdriver=is_html_webdriver,
                             last_error=watch['last_error'],
                             last_error_screenshot=watch.get_error_snapshot(),
                             last_error_text=watch.get_error_text(),
@@ -224,5 +256,9 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
                             uuid=uuid,
                             versions=dates,  # All except current/last
                             watch_a=watch,
+                             llm_configured=llm_configured,
+                             llm_diff_summary=llm_diff_summary,
+                             llm_summary_prompt=llm_summary_prompt,
+                             viewing_latest=viewing_latest,
                             )
    return output
--- a/Show More
+++ b/Show More