mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-05-07 02:01:04 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b886b0eafc | |||
| 56b25fa19a | |||
| b9d2c52e12 |
@@ -66,27 +66,27 @@ jobs:
|
||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Docker Hub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
@@ -103,19 +103,11 @@ jobs:
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -136,10 +128,10 @@ jobs:
|
||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
||||
echo "Github ref: ${{ github.ref }}"
|
||||
echo "Github ref name: ${{ github.ref_name }}"
|
||||
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
@@ -150,20 +142,11 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||
type=raw,value=latest
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
org.opencontainers.image.version=${{ github.event.release.tag_name }}
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
@@ -60,14 +60,14 @@ jobs:
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
@@ -20,84 +20,10 @@ jobs:
|
||||
pip install openapi-spec-validator
|
||||
python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
|
||||
|
||||
lint-translations:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- name: Check .po files with msgfmt
|
||||
run: |
|
||||
sudo apt-get install -y gettext
|
||||
find changedetectionio/translations -name "*.po" | while read f; do
|
||||
echo "Checking $f"
|
||||
msgfmt --check-format -o /dev/null "$f"
|
||||
done
|
||||
- name: Check translation catalog is up-to-date
|
||||
run: |
|
||||
pip install "$(grep -E '^babel==' requirements.txt)"
|
||||
python setup.py extract_messages
|
||||
python setup.py update_catalog
|
||||
python setup.py compile_catalog
|
||||
# Ignore POT-Creation-Date timestamp lines — they change on every extract_messages run
|
||||
if git diff changedetectionio/translations | grep -v 'POT-Creation-Date' | grep -qE '^[+-][^+-]'; then
|
||||
echo "ERROR: Translation catalog is out of sync. Run: python setup.py extract_messages && python setup.py update_catalog && python setup.py compile_catalog"
|
||||
git diff --stat changedetectionio/translations
|
||||
exit 1
|
||||
fi
|
||||
|
||||
lint-template-i18n:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- name: Check for fragmented gettext calls in templates
|
||||
run: |
|
||||
python3 << 'PYEOF'
|
||||
import re, sys
|
||||
from pathlib import Path
|
||||
|
||||
# Detects adjacent {{ _(...) }} calls on the same line separated only by HTML
|
||||
# tags, whitespace, or non-translating Jinja2 variables — the anti-pattern of
|
||||
# splitting a single sentence across multiple msgids.
|
||||
# See https://github.com/dgtlmoon/changedetection.io/issues/4074 for background.
|
||||
#
|
||||
# The correct fix is to consolidate fragments into one entire-sentence msgid,
|
||||
# injecting dynamic values via %(name)s kwargs — per the GNU gettext manual
|
||||
# sections "Entire sentences" and "No string concatenation". See PR #4076 for
|
||||
# worked examples of each consolidation pattern.
|
||||
#
|
||||
# BASELINE: this limit reflects pre-existing violations present when this check
|
||||
# was introduced. It must only ever go DOWN. Each time you fix a template, lower
|
||||
# the limit by the number of lines fixed so the improvement is locked in.
|
||||
# When the count reaches 0, replace the baseline check with a hard sys.exit(1).
|
||||
BASELINE_LIMIT = 44
|
||||
|
||||
FRAGMENT_RE = re.compile(
|
||||
r'\{\{[^{}]*\b_\s*\([^)]*\)[^{}]*\}\}'
|
||||
r'(?:\s*(?:<[^>]+>|\{\{(?![^}]*\b_\s*\()[^}]*\}\})\s*)+'
|
||||
r'\{\{[^{}]*\b_\s*\([^)]*\)[^{}]*\}\}'
|
||||
)
|
||||
|
||||
violations = []
|
||||
for f in sorted(Path('changedetectionio').rglob('*.html')):
|
||||
for lineno, line in enumerate(f.read_text().splitlines(), 1):
|
||||
if FRAGMENT_RE.search(line):
|
||||
violations.append(f"{f}:{lineno}: {line.strip()[:120]}")
|
||||
|
||||
count = len(violations)
|
||||
print(f"Fragmented i18n calls found: {count} (limit: {BASELINE_LIMIT})")
|
||||
for v in violations:
|
||||
print(v)
|
||||
|
||||
if count > BASELINE_LIMIT:
|
||||
print(f"\nERROR: {count} fragmented gettext calls exceed the baseline of {BASELINE_LIMIT}.")
|
||||
print("Consolidate adjacent _() calls into a single entire-sentence msgid.")
|
||||
print("See https://github.com/dgtlmoon/changedetection.io/issues/4074 for patterns.")
|
||||
sys.exit(1)
|
||||
PYEOF
|
||||
|
||||
test-application-3-10:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: [lint-code, lint-translations, lint-template-i18n]
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.10'
|
||||
@@ -105,7 +31,7 @@ jobs:
|
||||
|
||||
test-application-3-11:
|
||||
# Always run
|
||||
needs: [lint-code, lint-translations, lint-template-i18n]
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.11'
|
||||
@@ -113,7 +39,7 @@ jobs:
|
||||
test-application-3-12:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: [lint-code, lint-translations, lint-template-i18n]
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.12'
|
||||
@@ -122,17 +48,8 @@ jobs:
|
||||
test-application-3-13:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: [lint-code, lint-translations, lint-template-i18n]
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
|
||||
|
||||
test-application-3-14:
|
||||
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: [lint-code, lint-translations, lint-template-i18n]
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.14'
|
||||
skip-pypuppeteer: false
|
||||
skip-pypuppeteer: true
|
||||
@@ -42,10 +42,10 @@ jobs:
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -99,7 +99,11 @@ jobs:
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/ tests/llm/'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -112,7 +116,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -161,14 +165,14 @@ jobs:
|
||||
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
@@ -184,7 +188,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -226,7 +230,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -266,7 +270,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -302,7 +306,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -330,7 +334,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -456,10 +460,10 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if echo "$BODY" | grep -q 'OK'; then
|
||||
echo "✓ Watch updated successfully (HTTP 200, response: OK)"
|
||||
if echo "$BODY" | grep -q '"ok": *true'; then
|
||||
echo "✓ Watch updated successfully (HTTP 200, ok: true)"
|
||||
else
|
||||
echo "ERROR: Expected response 'OK', got: $BODY"
|
||||
echo "ERROR: Response missing 'ok: true'"
|
||||
echo "Response: $BODY"
|
||||
exit 1
|
||||
fi
|
||||
@@ -500,7 +504,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -540,7 +544,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -570,7 +574,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -583,10 +587,6 @@ jobs:
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
- name: Plugin get_html_head_extras hook injects into base.html
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -598,7 +598,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -643,7 +643,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -706,19 +706,7 @@ jobs:
|
||||
- name: Check upgrade works without error
|
||||
run: |
|
||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
|
||||
# Checkout old version and create datastore
|
||||
git checkout 0.49.1
|
||||
python3 -m venv .venv
|
||||
@@ -727,7 +715,7 @@ jobs:
|
||||
pip install 'pyOpenSSL>=23.2.0'
|
||||
|
||||
echo "=== Running version 0.49.1 to create datastore ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true python3 ./changedetection.py -C -d /tmp/data &
|
||||
python3 ./changedetection.py -C -d /tmp/data &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready
|
||||
@@ -775,7 +763,7 @@ jobs:
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||
TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||
|
||||
echo "=== Upgrade test output ==="
|
||||
cat /tmp/upgrade-test.log
|
||||
@@ -783,7 +771,7 @@ jobs:
|
||||
|
||||
# Now start the current version normally to verify the tag survived
|
||||
echo "=== Starting current version to verify tag exists after upgrade ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||
timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready and fetch UI
|
||||
@@ -832,7 +820,7 @@ jobs:
|
||||
|
||||
- name: Upload upgrade test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/upgrade-test.log
|
||||
|
||||
@@ -22,20 +22,6 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
- Get started watching and receiving website change notifications straight away.
|
||||
- See our [tutorials and how-to page for more inspiration](https://changedetection.io/tutorials)
|
||||
|
||||
## AI-powered website change detection — smart alerts and plain-language summaries
|
||||
|
||||
Stop drowning in noise. Connect any LLM (OpenAI, Gemini, Anthropic, Ollama and more) and go from _"something changed"_ to _"only the thing you care about changed"_.
|
||||
|
||||
**AI change detection rules** — write a plain-English intent once: _"notify me only when the price drops below $50"_, _"alert me when the item comes back in stock"_, _"ignore navigation and footer changes"_. The AI evaluates every detected diff against your intent and silently suppresses everything irrelevant. Fewer false positives, zero noise.
|
||||
|
||||
**AI change summaries** — instead of staring at a raw diff, your notification reads _"Price dropped from $89.99 to $67.00"_ or _"3 new products added to the listing"_. Works globally or per-watch, with full control over the prompt.
|
||||
|
||||
Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with Ollama. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).
|
||||
|
||||
[<img src="./docs/LLM-change-summary.jpeg" style="max-width:100%;" alt="AI-powered website change detection — plain language change summaries and smart alert rules" title="AI website change detection with LLM change summaries and intelligent alert filtering" />](https://changedetection.io?src=github)
|
||||
|
||||
_Note: Available in our subscription/hosted service from June 2026_
|
||||
|
||||
### Target specific parts of the webpage using the Visual Selector tool.
|
||||
|
||||
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
|
||||
@@ -322,27 +308,9 @@ I offer commercial support, this software is depended on by network security, ae
|
||||
[release-link]: https://github.com/dgtlmoon/changedetection.io/releases
|
||||
[docker-link]: https://hub.docker.com/r/dgtlmoon/changedetection.io
|
||||
|
||||
## Commercial Licencing
|
||||
|
||||
## Disclaimer
|
||||
|
||||
**This software is provided "as-is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the software or the use or other dealings in the software.**
|
||||
|
||||
### Website content monitoring
|
||||
|
||||
You are solely responsible for ensuring that your use of this software complies with the terms of service, `robots.txt` directives, access policies, and all applicable laws of any website or service you choose to monitor. The authors and contributors of this software accept no liability whatsoever for how you choose to use it or for any consequences arising from that use.
|
||||
|
||||
### AI / LLM features
|
||||
|
||||
If you choose to enable AI / LLM features, content detected on monitored websites — including page diffs and extracted text — will be transmitted to a third-party AI provider of your choosing, outside of this installation. You are solely responsible for:
|
||||
|
||||
- Ensuring such transmission is permitted by the terms of service of every website you monitor.
|
||||
- Compliance with all applicable data-protection and privacy laws (including but not limited to GDPR) with respect to any personal data that may appear in monitored content.
|
||||
- All API costs and charges levied by your chosen AI provider. This software has no visibility into or control over those charges.
|
||||
- Any consequences arising from acting on AI-generated output.
|
||||
|
||||
**AI and LLM models are known to hallucinate** — producing plausible-sounding but factually incorrect, incomplete, or entirely fabricated output with apparent confidence. By design, LLMs may also omit or silently truncate relevant information during summarisation. **AI output must never be relied upon as complete or accurate.**
|
||||
|
||||
By using this software, and in particular any AI / LLM features, you personally indemnify and hold harmless the author(s), contributor(s), and any associated parties from and against any and all claims, damages, losses, costs, and expenses (including reasonable legal fees) arising out of or in connection with your use of this software.
|
||||
If you are reselling this software either in part or full as part of any commercial arrangement, you must abide by our COMMERCIAL_LICENCE.md found in our code repository, please contact dgtlmoon@gmail.com and contact@changedetection.io .
|
||||
|
||||
## Third-party licenses
|
||||
|
||||
@@ -352,6 +320,4 @@ changedetectionio.html_tools.elementpath_tostring: Copyright (c), 2018-2021, SIS
|
||||
|
||||
Recognition of fantastic contributors to the project
|
||||
|
||||
<sub>Developer note: see [translation guide](changedetectionio/translations/README.md) for i18n template patterns and workflow.</sub>
|
||||
|
||||
- Constantin Hong https://github.com/Constantin1489
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
[python: **.py]
|
||||
keywords = _ _l gettext pgettext:1c,2
|
||||
keywords = _:1,_l:1,gettext:1
|
||||
|
||||
[jinja2: **/templates/**.html]
|
||||
encoding = utf-8
|
||||
keywords = _ _l gettext pgettext:1c,2
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.10'
|
||||
__version__ = '0.53.2'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -61,22 +61,8 @@ import time
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||
try:
|
||||
import ctypes as _ctypes
|
||||
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
@@ -400,11 +386,8 @@ def main():
|
||||
datastore.data['settings']['application']['all_paused'] = all_paused
|
||||
logger.info(f"Setting all watches paused: {all_paused}")
|
||||
|
||||
# Register built-in restock plugins (deferred here to avoid circular imports at module load time)
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins, register_builtin_restock_plugins
|
||||
register_builtin_restock_plugins()
|
||||
|
||||
# Inject datastore into plugins that need access to settings
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
# Step 1: Add URLs with their options (if provided via -u flags)
|
||||
@@ -627,14 +610,12 @@ def main():
|
||||
|
||||
@app.context_processor
|
||||
def inject_template_globals():
|
||||
from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config
|
||||
return dict(right_sticky="v"+__version__,
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
|
||||
all_paused=datastore.data['settings']['application'].get('all_paused', False),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False),
|
||||
llm_configured=bool(_get_llm_config(datastore)),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False)
|
||||
)
|
||||
|
||||
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
import functools
|
||||
from flask import make_response
|
||||
from flask_restful import Resource
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_spec_yaml():
|
||||
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
|
||||
import yaml
|
||||
from changedetectionio.api import build_merged_spec_dict
|
||||
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
|
||||
|
||||
|
||||
class Spec(Resource):
|
||||
def get(self):
|
||||
"""Return the merged OpenAPI spec including all registered processor extensions."""
|
||||
return make_response(
|
||||
_get_spec_yaml(),
|
||||
200,
|
||||
{'Content-Type': 'application/yaml'}
|
||||
)
|
||||
@@ -17,7 +17,7 @@ class Tag(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single tag
|
||||
# curl http://localhost:5000/api/v1/tag/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
@@ -97,6 +97,17 @@ class Tag(Resource):
|
||||
# Delete the tag, and any tag reference
|
||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches
|
||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
@@ -177,13 +188,6 @@ class Tag(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_tag(title=title)
|
||||
if new_uuid:
|
||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||
if extra:
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||
if tag:
|
||||
tag.update(extra)
|
||||
tag.commit()
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported tag", 400
|
||||
|
||||
@@ -57,7 +57,7 @@ class Watch(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single watch, excluding the history list (can be large)
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>
|
||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||
# ?recheck=true
|
||||
@auth.check_token
|
||||
@@ -217,7 +217,7 @@ class WatchHistory(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
# Get a list of available history for a watch by UUID
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>/history
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistory')
|
||||
def get(self, uuid):
|
||||
@@ -338,7 +338,7 @@ class WatchHistoryDiff(Resource):
|
||||
word_diff = True
|
||||
|
||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'false'))
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'true'))
|
||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||
include_added = strtobool(request.args.get('added', 'true'))
|
||||
@@ -349,7 +349,7 @@ class WatchHistoryDiff(Resource):
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=ignore_whitespace,
|
||||
include_equal=not changes_only,
|
||||
include_equal=changes_only,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
@@ -567,4 +567,4 @@ class CreateWatch(Resource):
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
return list, 200
|
||||
return list, 200
|
||||
@@ -3,18 +3,29 @@ from flask import request, abort
|
||||
from loguru import logger
|
||||
|
||||
@functools.cache
|
||||
def build_merged_spec_dict():
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
import os
|
||||
import yaml # Lazy import - only loaded when API validation is actually used
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Each processor can provide an api.yaml file alongside its __init__.py that defines
|
||||
additional schemas (e.g., processor_config_restock_diff). These are merged into
|
||||
WatchBase.properties so the spec accurately reflects what the API accepts.
|
||||
|
||||
Plugin processors (via pluggy) are also supported - they just need an api.yaml
|
||||
next to their processor module.
|
||||
|
||||
Returns the merged dict (cached - do not mutate the returned value).
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the YAML dict directly (not the OpenAPI object).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
@@ -24,59 +35,7 @@ def build_merged_spec_dict():
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
|
||||
try:
|
||||
from changedetectionio.processors import find_processors, get_parent_module
|
||||
for module, proc_name in find_processors():
|
||||
parent = get_parent_module(module)
|
||||
if not parent or not hasattr(parent, '__file__'):
|
||||
continue
|
||||
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
|
||||
if not os.path.exists(api_yaml_path):
|
||||
continue
|
||||
with open(api_yaml_path, 'r', encoding='utf-8') as f:
|
||||
proc_spec = yaml.safe_load(f)
|
||||
# Merge schemas
|
||||
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
|
||||
spec_dict['components']['schemas'].update(proc_schemas)
|
||||
# Inject processor_config_{name} into WatchBase if the schema is defined
|
||||
schema_key = f'processor_config_{proc_name}'
|
||||
if schema_key in proc_schemas:
|
||||
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
|
||||
'$ref': f'#/components/schemas/{schema_key}'
|
||||
}
|
||||
# Append x-code-samples from processor paths into existing path operations
|
||||
for path, path_item in proc_spec.get('paths', {}).items():
|
||||
if path not in spec_dict.get('paths', {}):
|
||||
continue
|
||||
for method, operation in path_item.items():
|
||||
if method not in spec_dict['paths'][path]:
|
||||
continue
|
||||
if 'x-code-samples' in operation:
|
||||
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
|
||||
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to merge processor API specs: {e}")
|
||||
|
||||
return spec_dict
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
return OpenAPI.from_dict(build_merged_spec_dict())
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the merged YAML dict (not the OpenAPI object).
|
||||
"""
|
||||
return build_merged_spec_dict()
|
||||
return yaml.safe_load(f)
|
||||
|
||||
@functools.cache
|
||||
def _resolve_schema_properties(schema_name):
|
||||
@@ -144,7 +103,6 @@ def validate_openapi_request(operation_id):
|
||||
if request.method.upper() != 'GET':
|
||||
# Lazy import - only loaded when actually validating a request
|
||||
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
||||
from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError
|
||||
|
||||
spec = get_openapi_spec()
|
||||
openapi_request = FlaskOpenAPIRequest(request)
|
||||
@@ -152,16 +110,6 @@ def validate_openapi_request(operation_id):
|
||||
if result.errors:
|
||||
error_details = []
|
||||
for error in result.errors:
|
||||
# Skip path/server validation errors for reverse proxy compatibility
|
||||
# Flask routing already validates that endpoints exist (returns 404 if not).
|
||||
# OpenAPI validation here is primarily for request body schema validation.
|
||||
# When behind nginx/reverse proxy, URLs may have path prefixes that don't
|
||||
# match the OpenAPI server definitions, causing false positives.
|
||||
if isinstance(error, PathError):
|
||||
logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
|
||||
continue
|
||||
|
||||
error_str = str(error)
|
||||
# Extract detailed schema errors from __cause__
|
||||
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
|
||||
for schema_error in error.__cause__.schema_errors:
|
||||
@@ -169,12 +117,9 @@ def validate_openapi_request(operation_id):
|
||||
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
|
||||
error_details.append(f"{field}: {msg}")
|
||||
else:
|
||||
error_details.append(error_str)
|
||||
|
||||
# Only raise if we have actual validation errors (not path/server issues)
|
||||
if error_details:
|
||||
error_details.append(str(error))
|
||||
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
|
||||
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
|
||||
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
|
||||
except BadRequest:
|
||||
# Re-raise BadRequest exceptions (validation failures)
|
||||
raise
|
||||
@@ -191,6 +136,5 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
from .Spec import Spec
|
||||
from .Notifications import Notifications
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from loguru import logger
|
||||
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
|
||||
|
||||
|
||||
def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
def create_backup(datastore_path, watches: dict):
|
||||
logger.debug("Creating backup...")
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
@@ -40,14 +40,10 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||
for uuid, tag in (tags or {}).items():
|
||||
for f in Path(tag.data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
@@ -92,28 +88,24 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
from .restore import construct_restore_blueprint
|
||||
|
||||
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
|
||||
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
|
||||
backup_threads = []
|
||||
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
|
||||
daemon=True,
|
||||
name="BackupCreator"
|
||||
)
|
||||
@@ -121,7 +113,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
backup_threads.append(zip_thread)
|
||||
flash(gettext("Backup building in background, check back in a few minutes."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
def find_backups():
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
@@ -141,43 +133,40 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return backup_info
|
||||
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
|
||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
if not backups:
|
||||
abort(404)
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||
abort(404)
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
@backups_blueprint.route("/", methods=['GET'])
|
||||
@backups_blueprint.route("/create", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def create():
|
||||
@backups_blueprint.route("", methods=['GET'])
|
||||
def index():
|
||||
backups = find_backups()
|
||||
output = render_template("backup_create.html",
|
||||
output = render_template("overview.html",
|
||||
available_backups=backups,
|
||||
backup_running=any(thread.is_alive() for thread in backup_threads)
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
def remove_backups():
|
||||
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
@@ -187,6 +176,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
flash(gettext("Backups were deleted."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
return backups_blueprint
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import threading
|
||||
import zipfile
|
||||
|
||||
from flask import Blueprint, render_template, flash, url_for, redirect, request
|
||||
from flask_babel import gettext, lazy_gettext as _l
|
||||
from wtforms import Form, BooleanField, SubmitField
|
||||
from flask_wtf.file import FileField, FileAllowed
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
|
||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||
_UUID_RE = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class RestoreForm(Form):
|
||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||
FileAllowed(['zip'], _l('Must be a .zip backup file!'))
|
||||
])
|
||||
include_groups = BooleanField(_l('Include groups'), default=True)
|
||||
include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
|
||||
include_watches = BooleanField(_l('Include watches'), default=True)
|
||||
include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
|
||||
submit = SubmitField(_l('Restore backup'))
|
||||
|
||||
|
||||
def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
|
||||
"""
|
||||
Extract and import watches and groups from a backup zip stream.
|
||||
|
||||
Mirrors the store's _load_watches / _load_tags loading pattern:
|
||||
- UUID dirs with tag.json → Tag.model + tag_obj.commit()
|
||||
- UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
|
||||
|
||||
Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
|
||||
Raises zipfile.BadZipFile if the stream is not a valid zip.
|
||||
"""
|
||||
from changedetectionio.model import Tag
|
||||
|
||||
restored_groups = 0
|
||||
skipped_groups = 0
|
||||
restored_watches = 0
|
||||
skipped_watches = 0
|
||||
|
||||
current_tags = datastore.data['settings']['application'].get('tags', {})
|
||||
current_watches = datastore.data['watching']
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||
raise ValueError(
|
||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||
)
|
||||
resolved_dest = os.path.realpath(tmpdir)
|
||||
for member in zf.infolist():
|
||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||
zf.extract(member, tmpdir)
|
||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||
|
||||
for entry in os.scandir(tmpdir):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
|
||||
uuid = entry.name
|
||||
if not _UUID_RE.match(uuid):
|
||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||
continue
|
||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||
|
||||
# --- Tags (groups) ---
|
||||
if include_groups and os.path.exists(tag_json_path):
|
||||
if uuid in current_tags and not include_groups_replace:
|
||||
logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
|
||||
skipped_groups += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(tag_json_path, 'r', encoding='utf-8') as f:
|
||||
tag_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
title = tag_data.get('title', uuid)
|
||||
logger.debug(f"Restore: importing group '{title}' ({uuid})")
|
||||
|
||||
# Mirror _load_tags: set uuid and force processor
|
||||
tag_data['uuid'] = uuid
|
||||
tag_data['processor'] = 'restock_diff'
|
||||
|
||||
# Copy the UUID directory so data_dir exists for commit()
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
tag_obj = Tag.model(
|
||||
datastore_path=datastore.datastore_path,
|
||||
__datastore=datastore.data,
|
||||
default=tag_data
|
||||
)
|
||||
current_tags[uuid] = tag_obj
|
||||
tag_obj.commit()
|
||||
restored_groups += 1
|
||||
logger.success(f"Restore: group '{title}' ({uuid}) restored")
|
||||
|
||||
# --- Watches ---
|
||||
elif include_watches and os.path.exists(watch_json_path):
|
||||
if uuid in current_watches and not include_watches_replace:
|
||||
logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
|
||||
skipped_watches += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(watch_json_path, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
url = watch_data.get('url', uuid)
|
||||
logger.debug(f"Restore: importing watch '{url}' ({uuid})")
|
||||
|
||||
# Copy UUID directory first so data_dir and history files exist
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
# Mirror _load_watches / rehydrate_entity
|
||||
watch_data['uuid'] = uuid
|
||||
watch_obj = datastore.rehydrate_entity(uuid, watch_data)
|
||||
current_watches[uuid] = watch_obj
|
||||
watch_obj.commit()
|
||||
restored_watches += 1
|
||||
logger.success(f"Restore: watch '{url}' ({uuid}) restored")
|
||||
|
||||
logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
|
||||
f"watches {restored_watches} restored / {skipped_watches} skipped")
|
||||
|
||||
# Persist changedetection.json (includes the updated tags dict)
|
||||
logger.debug("Restore: committing datastore settings")
|
||||
datastore.commit()
|
||||
|
||||
return {
|
||||
'restored_groups': restored_groups,
|
||||
'skipped_groups': skipped_groups,
|
||||
'restored_watches': restored_watches,
|
||||
'skipped_watches': skipped_watches,
|
||||
}
|
||||
|
||||
|
||||
|
||||
def construct_restore_blueprint(datastore):
|
||||
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
|
||||
restore_threads = []
|
||||
|
||||
@restore_blueprint.route("/restore", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def restore():
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
form=form,
|
||||
restore_running=any(t.is_alive() for t in restore_threads),
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def backups_restore_start():
|
||||
if any(t.is_alive() for t in restore_threads):
|
||||
flash(gettext("A restore is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
zip_file = request.files.get('zip_file')
|
||||
if not zip_file or not zip_file.filename:
|
||||
flash(gettext("No file uploaded"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
if not zip_file.filename.lower().endswith('.zip'):
|
||||
flash(gettext("File must be a .zip backup file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Reject oversized uploads before reading the stream into memory.
|
||||
content_length = request.content_length
|
||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return.
|
||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||
try:
|
||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
zip_bytes = io.BytesIO(raw)
|
||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||
pass
|
||||
zip_bytes.seek(0)
|
||||
except zipfile.BadZipFile:
|
||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
include_groups = request.form.get('include_groups') == 'y'
|
||||
include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
|
||||
include_watches = request.form.get('include_watches') == 'y'
|
||||
include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
|
||||
|
||||
restore_thread = threading.Thread(
|
||||
target=import_from_zip,
|
||||
kwargs={
|
||||
'zip_stream': zip_bytes,
|
||||
'datastore': datastore,
|
||||
'include_groups': include_groups,
|
||||
'include_groups_replace': include_groups_replace,
|
||||
'include_watches': include_watches,
|
||||
'include_watches_replace': include_watches_replace,
|
||||
},
|
||||
daemon=True,
|
||||
name="BackupRestore"
|
||||
)
|
||||
restore_thread.start()
|
||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||
restore_threads.append(restore_thread)
|
||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
return restore_blueprint
|
||||
@@ -1,49 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li>
|
||||
<a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary"
|
||||
href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error "
|
||||
href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -1,61 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if restore_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A restore is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||
method="POST"
|
||||
enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.zip_file) }}
|
||||
</div>
|
||||
|
||||
<div class="pure-controls">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,36 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h2>{{ _('Backups') }}</h2>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -102,35 +102,6 @@ def run_async_in_browser_loop(coro):
|
||||
else:
|
||||
raise RuntimeError("Browser steps event loop is not available")
|
||||
|
||||
async def _close_session_resources(session_data, label=''):
|
||||
"""Close all browser resources for a session in the correct order.
|
||||
|
||||
browserstepper.cleanup() closes page+context but not the browser itself.
|
||||
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
|
||||
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
|
||||
"""
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
await browserstepper.cleanup()
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up browserstepper{label}: {e}")
|
||||
|
||||
browser = session_data.get('browser')
|
||||
if browser:
|
||||
try:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser{label}: {e}")
|
||||
|
||||
playwright_context = session_data.get('playwright_context')
|
||||
if playwright_context:
|
||||
try:
|
||||
await playwright_context.stop()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping playwright context{label}: {e}")
|
||||
|
||||
|
||||
def cleanup_expired_sessions():
|
||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
@@ -148,10 +119,13 @@ def cleanup_expired_sessions():
|
||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||
session_data = browsersteps_sessions[session_id]
|
||||
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
# Cleanup playwright resources asynchronously
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -178,10 +152,12 @@ def cleanup_session_for_watch(watch_uuid):
|
||||
|
||||
session_data = browsersteps_sessions.get(session_id)
|
||||
if session_data:
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -202,74 +178,64 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
import time
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
# We keep the playwright session open for many minutes
|
||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
|
||||
browsersteps_start_session = {'start_time': time.time()}
|
||||
|
||||
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
|
||||
if proxy_url:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
# Create a new async playwright instance for browser steps
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
|
||||
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
|
||||
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
|
||||
watch = datastore.data['watching'][watch_uuid]
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_name = watch.get_fetch_backend or 'system'
|
||||
if fetcher_name == 'system':
|
||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||
|
||||
browser = None
|
||||
playwright_context = None
|
||||
|
||||
# If the fetcher has its own browser launch for the live steps UI, use it.
|
||||
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
|
||||
# or None to fall back to the default CDP path.
|
||||
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
|
||||
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
|
||||
if result is not None:
|
||||
browser, playwright_context = result
|
||||
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")
|
||||
|
||||
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
|
||||
if browser is None:
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if '?' not in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if not '?' in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
browsersteps_start_session['browser'] = browser
|
||||
browsersteps_start_session['playwright_context'] = playwright_context
|
||||
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
if proxy_url:
|
||||
|
||||
# Playwright needs separate username and password values
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
|
||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||
browserstepper = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browser,
|
||||
proxy=proxy,
|
||||
start_url=watch.link,
|
||||
headers=watch.get('headers')
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
# Initialize the async connection
|
||||
await browserstepper.connect(proxy=proxy)
|
||||
|
||||
browsersteps_start_session['browserstepper'] = browserstepper
|
||||
|
||||
# For test
|
||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
import uuid
|
||||
@@ -304,8 +270,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug("Starting connection with playwright - done")
|
||||
return {'browsersteps_session_id': browsersteps_session_id}
|
||||
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
def browser_steps_fetch_screenshot_image():
|
||||
from flask import (
|
||||
make_response,
|
||||
@@ -330,8 +296,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
|
||||
|
||||
# A request for an action was received
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
def browsersteps_ui_update():
|
||||
import base64
|
||||
|
||||
|
||||
@@ -40,13 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
contents = ''
|
||||
now = time.time()
|
||||
try:
|
||||
import asyncio
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
asyncio.run(update_handler.call_browser(preferred_proxy_id=preferred_proxy))
|
||||
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
@@ -95,13 +94,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/status", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/status", methods=['GET'])
|
||||
def get_recheck_status(uuid):
|
||||
results = _recalc_check_status(uuid=uuid)
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/start", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/start", methods=['GET'])
|
||||
def start_check(uuid):
|
||||
|
||||
if not datastore.proxy_list:
|
||||
|
||||
@@ -160,7 +160,8 @@ class import_xlsx_wachete(Importer):
|
||||
flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error')
|
||||
return
|
||||
|
||||
for row_id, row in enumerate(wb.active.iter_rows(min_row=2), start=2):
|
||||
row_id = 2
|
||||
for row in wb.active.iter_rows(min_row=row_id):
|
||||
try:
|
||||
extras = {}
|
||||
data = {}
|
||||
@@ -211,6 +212,8 @@ class import_xlsx_wachete(Importer):
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error')
|
||||
else:
|
||||
row_id += 1
|
||||
|
||||
flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
|
||||
|
||||
@@ -238,10 +241,10 @@ class import_xlsx_custom(Importer):
|
||||
|
||||
# @todo cehck atleast 2 rows, same in other method
|
||||
from changedetectionio.forms import validate_url
|
||||
row_i = 0
|
||||
row_i = 1
|
||||
|
||||
try:
|
||||
for row_i, row in enumerate(wb.active.iter_rows(), start=1):
|
||||
for row in wb.active.iter_rows():
|
||||
url = None
|
||||
tags = None
|
||||
extras = {}
|
||||
@@ -292,5 +295,7 @@ class import_xlsx_custom(Importer):
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error')
|
||||
else:
|
||||
row_i += 1
|
||||
|
||||
flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
|
||||
@@ -9,7 +9,6 @@
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -17,15 +16,10 @@
|
||||
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
|
||||
<p>
|
||||
{{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
|
||||
<br>
|
||||
</p>
|
||||
<div class="pure-control-group">
|
||||
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
||||
<br>
|
||||
<p><strong>{{ _('Example') }}: </strong><code>https://example.com tag1, tag2, last tag</code></p>
|
||||
<p><strong>{{ _('Example:') }} </strong><code>https://example.com tag1, tag2, last tag</code></p>
|
||||
{{ _('URLs which do not pass validation will stay in the textarea.') }}
|
||||
</div>
|
||||
{{ render_field(form.processor, class="processor") }}
|
||||
@@ -43,15 +37,20 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="distill-io">
|
||||
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
||||
{{ _('This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.')|safe }}
|
||||
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
||||
<br>
|
||||
<p>
|
||||
{{ _('How to export?') }} <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
||||
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
|
||||
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
||||
font-family:monospace;
|
||||
white-space: pre;
|
||||
@@ -115,7 +114,6 @@
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -15,7 +15,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/accept", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
|
||||
def accept(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
@@ -25,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/reject", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||
def reject(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||
datastore.data['watching'][uuid].commit()
|
||||
|
||||
@@ -9,12 +9,11 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/watch/<uuid_str:uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
|
||||
def rss_single_watch(uuid):
|
||||
import time
|
||||
|
||||
from flask import make_response, request, Response
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from flask import make_response, request
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
|
||||
@@ -43,12 +42,12 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return Response(_l("Watch with UUID %(uuid)s not found", uuid=uuid), status=404, mimetype='text/plain')
|
||||
return f"Watch with UUID {uuid} not found", 404
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return Response(_l("Watch %(uuid)s does not have enough history snapshots to show changes (need at least 2)", uuid=uuid), status=400, mimetype='text/plain')
|
||||
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
|
||||
|
||||
# Get the number of diffs to include (default: 5)
|
||||
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
||||
|
||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
|
||||
@@ -13,9 +13,7 @@ from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
from changedetectionio.blueprint.settings.llm import construct_llm_blueprint
|
||||
settings_blueprint = Blueprint('settings', __name__, template_folder="templates")
|
||||
settings_blueprint.register_blueprint(construct_llm_blueprint(datastore), url_prefix='/llm')
|
||||
|
||||
@settings_blueprint.route("", methods=['GET', "POST"])
|
||||
@login_optionally_required
|
||||
@@ -29,23 +27,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
|
||||
default = deepcopy(datastore.data['settings'])
|
||||
|
||||
# Pre-populate LLM sub-form fields from stored config (text fields only —
|
||||
# PasswordField for api_key is intentionally left blank on GET).
|
||||
_stored_llm = datastore.data['settings']['application'].get('llm') or {}
|
||||
default['llm'] = {
|
||||
'llm_model': _stored_llm.get('model', ''),
|
||||
'llm_api_base': _stored_llm.get('api_base', ''),
|
||||
'llm_change_summary_default': datastore.data['settings']['application'].get('llm_change_summary_default', ''),
|
||||
'llm_override_diff_with_summary': datastore.data['settings']['application'].get('llm_override_diff_with_summary', True),
|
||||
'llm_restock_use_fallback_extract': datastore.data['settings']['application'].get('llm_restock_use_fallback_extract', True),
|
||||
'llm_budget_action': datastore.data['settings']['application'].get('llm_budget_action', 'skip_llm'),
|
||||
'llm_thinking_budget': str(datastore.data['settings']['application'].get('llm_thinking_budget', 0)),
|
||||
'llm_max_summary_tokens': str(datastore.data['settings']['application'].get('llm_max_summary_tokens', 3000)),
|
||||
'llm_token_budget_month': _stored_llm.get('token_budget_month', 0),
|
||||
'llm_max_input_chars': _stored_llm.get('max_input_chars', 0),
|
||||
}
|
||||
|
||||
if datastore.proxy_list is not None:
|
||||
available_proxies = list(datastore.proxy_list.keys())
|
||||
# When enabled
|
||||
@@ -95,73 +76,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
datastore.data['settings']['application'].update(app_update)
|
||||
|
||||
# Save LLM config separately under settings.application.llm.
|
||||
# Token counters (tokens_total_cumulative, tokens_this_month, tokens_month_key)
|
||||
# are system-managed and must never be overwritten by form submissions.
|
||||
_LLM_PROTECTED_FIELDS = {
|
||||
'tokens_total_cumulative', 'tokens_this_month', 'tokens_month_key',
|
||||
'cost_usd_total_cumulative', 'cost_usd_this_month',
|
||||
}
|
||||
existing_llm = datastore.data['settings']['application'].get('llm') or {}
|
||||
preserved_counters = {k: v for k, v in existing_llm.items() if k in _LLM_PROTECTED_FIELDS}
|
||||
|
||||
llm_data = form.data.get('llm') or {}
|
||||
|
||||
# PasswordField never re-populates its value on GET, so the submitted value
|
||||
# is only non-empty when the user explicitly typed a new key.
|
||||
# If blank, preserve the existing key so a settings save doesn't accidentally clear it.
|
||||
submitted_api_key = (llm_data.get('llm_api_key') or '').strip()
|
||||
effective_api_key = submitted_api_key if submitted_api_key else existing_llm.get('api_key', '')
|
||||
|
||||
# Application-level LLM settings (survive provider changes)
|
||||
datastore.data['settings']['application']['llm_change_summary_default'] = (
|
||||
llm_data.get('llm_change_summary_default') or ''
|
||||
).strip()
|
||||
datastore.data['settings']['application']['llm_override_diff_with_summary'] = (
|
||||
bool(llm_data.get('llm_override_diff_with_summary', True))
|
||||
)
|
||||
datastore.data['settings']['application']['llm_restock_use_fallback_extract'] = (
|
||||
bool(llm_data.get('llm_restock_use_fallback_extract', True))
|
||||
)
|
||||
datastore.data['settings']['application']['llm_budget_action'] = (
|
||||
llm_data.get('llm_budget_action') or 'skip_llm'
|
||||
)
|
||||
datastore.data['settings']['application']['llm_thinking_budget'] = (
|
||||
int(llm_data.get('llm_thinking_budget') or 0)
|
||||
)
|
||||
datastore.data['settings']['application']['llm_max_summary_tokens'] = (
|
||||
int(llm_data.get('llm_max_summary_tokens') or 3000)
|
||||
)
|
||||
|
||||
# Monthly token budget — only save if env var is not set
|
||||
import os as _os
|
||||
if not _os.getenv('LLM_TOKEN_BUDGET_MONTH', '').strip():
|
||||
_budget = llm_data.get('llm_token_budget_month') or 0
|
||||
existing_llm['token_budget_month'] = int(_budget) if _budget else 0
|
||||
|
||||
# Max input chars — only save if env var is not set
|
||||
if not _os.getenv('LLM_MAX_INPUT_CHARS', '').strip():
|
||||
_max_chars = llm_data.get('llm_max_input_chars') or 0
|
||||
existing_llm['max_input_chars'] = int(_max_chars) if _max_chars else 0
|
||||
|
||||
llm_config = {
|
||||
'model': (llm_data.get('llm_model') or '').strip(),
|
||||
'api_key': effective_api_key,
|
||||
'api_base': (llm_data.get('llm_api_base') or '').strip(),
|
||||
'token_budget_month': existing_llm.get('token_budget_month', 0),
|
||||
'max_input_chars': existing_llm.get('max_input_chars', 0),
|
||||
**preserved_counters,
|
||||
}
|
||||
# Only store if a model is set
|
||||
if llm_config['model']:
|
||||
datastore.data['settings']['application']['llm'] = llm_config
|
||||
else:
|
||||
# Remove model config but retain counters for historical record
|
||||
if preserved_counters:
|
||||
datastore.data['settings']['application']['llm'] = preserved_counters
|
||||
else:
|
||||
datastore.data['settings']['application'].pop('llm', None)
|
||||
|
||||
# Handle dynamic worker count adjustment
|
||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
||||
new_worker_count = form.data['requests'].get('workers', 1)
|
||||
@@ -250,34 +164,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Instantiate the form with existing settings
|
||||
plugin_forms[plugin_id] = form_class(data=settings)
|
||||
|
||||
from changedetectionio.llm.evaluator import (
|
||||
get_llm_config as _get_llm_cfg,
|
||||
llm_configured_via_env,
|
||||
get_global_token_budget_month,
|
||||
)
|
||||
llm_config = _get_llm_cfg(datastore) or {}
|
||||
llm_env_configured = llm_configured_via_env()
|
||||
llm_stored = datastore.data['settings']['application'].get('llm') or {}
|
||||
llm_token_budget_month = get_global_token_budget_month(datastore)
|
||||
llm_token_budget_month_env = get_global_token_budget_month() # env var only, for readonly logic
|
||||
_max_input_chars_env_str = os.getenv('LLM_MAX_INPUT_CHARS', '').strip()
|
||||
llm_max_input_chars_env = int(_max_input_chars_env_str) if _max_input_chars_env_str.isdigit() else 0
|
||||
from changedetectionio.llm.evaluator import _get_max_input_chars, _DEFAULT_MAX_INPUT_CHARS
|
||||
llm_effective_max_input_chars = _get_max_input_chars(datastore)
|
||||
# Cost display: only when user configured their own key (not hosted/operator-managed)
|
||||
llm_show_costs = not llm_env_configured
|
||||
|
||||
output = render_template("settings.html",
|
||||
active_plugins=active_plugins,
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
llm_config=llm_config,
|
||||
llm_env_configured=llm_env_configured,
|
||||
llm_stored=llm_stored,
|
||||
llm_token_budget_month=llm_token_budget_month,
|
||||
llm_token_budget_month_env=llm_token_budget_month_env,
|
||||
llm_max_input_chars_env=llm_max_input_chars_env,
|
||||
llm_effective_max_input_chars=llm_effective_max_input_chars,
|
||||
llm_show_costs=llm_show_costs,
|
||||
python_version=python_version,
|
||||
uptime_seconds=uptime_seconds,
|
||||
available_timezones=sorted(available_timezones()),
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
import os
|
||||
|
||||
from flask import Blueprint, jsonify, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
|
||||
def construct_llm_blueprint(datastore: ChangeDetectionStore):
|
||||
llm_blueprint = Blueprint('llm', __name__)
|
||||
|
||||
@llm_blueprint.route("/models", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def llm_get_models():
|
||||
from flask import request
|
||||
provider = request.args.get('provider', '').strip()
|
||||
api_key = request.args.get('api_key', '').strip()
|
||||
api_base = request.args.get('api_base', '').strip()
|
||||
|
||||
logger.debug(f"LLM model list requested for provider={provider!r} api_base={api_base!r}")
|
||||
|
||||
if not provider:
|
||||
logger.debug("LLM model list: no provider specified, returning 400")
|
||||
return jsonify({'models': [], 'error': 'No provider specified'}), 400
|
||||
|
||||
# Fall back to the stored key if the user hasn't typed one yet
|
||||
if not api_key:
|
||||
api_key = (datastore.data['settings']['application'].get('llm') or {}).get('api_key', '')
|
||||
logger.debug("LLM model list: no api_key in request, using stored key")
|
||||
|
||||
_PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/'}
|
||||
prefix = _PREFIXES.get(provider, '')
|
||||
|
||||
try:
|
||||
import litellm
|
||||
logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} api_base={api_base!r}")
|
||||
raw = litellm.get_valid_models(
|
||||
check_provider_endpoint=True,
|
||||
custom_llm_provider=provider,
|
||||
api_key=api_key or None,
|
||||
api_base=api_base or None,
|
||||
) or []
|
||||
models = sorted({(m if m.startswith(prefix) else prefix + m) for m in raw})
|
||||
logger.debug(f"LLM model list: got {len(models)} models for provider={provider!r}")
|
||||
return jsonify({'models': models, 'error': None})
|
||||
except Exception as e:
|
||||
logger.error(f"LLM model list failed for provider={provider!r}: {e}")
|
||||
logger.exception("LLM model list full traceback:")
|
||||
return jsonify({'models': [], 'error': str(e)}), 400
|
||||
|
||||
@llm_blueprint.route("/test", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def llm_test():
|
||||
from changedetectionio.llm.client import completion
|
||||
|
||||
llm_cfg = datastore.data['settings']['application'].get('llm') or {}
|
||||
model = llm_cfg.get('model', '').strip()
|
||||
api_base = llm_cfg.get('api_base', '') or ''
|
||||
|
||||
logger.debug(f"LLM connection test requested: model={model!r} api_base={api_base!r}")
|
||||
|
||||
if not model:
|
||||
logger.error("LLM connection test failed: no model configured in datastore")
|
||||
return jsonify({'ok': False, 'error': 'No model configured.'}), 400
|
||||
|
||||
try:
|
||||
logger.debug(f"LLM connection test: sending test prompt to model={model!r}")
|
||||
text, total_tokens, input_tokens, output_tokens = completion(
|
||||
model=model,
|
||||
messages=[{'role': 'user', 'content':
|
||||
'Reply with exactly five words confirming you are ready.'}],
|
||||
api_key=llm_cfg.get('api_key') or None,
|
||||
api_base=api_base or None,
|
||||
timeout=20,
|
||||
max_tokens=200,
|
||||
)
|
||||
reply = text.strip()
|
||||
if not reply:
|
||||
logger.warning(
|
||||
f"LLM connection test: model={model!r} responded but returned empty content "
|
||||
f"tokens={total_tokens} (in={input_tokens} out={output_tokens}) — "
|
||||
f"check finish_reason in client debug log above"
|
||||
)
|
||||
return jsonify({'ok': False, 'error': 'Model responded but returned empty content — check server logs.'}), 400
|
||||
|
||||
logger.success(
|
||||
f"LLM connection test OK: model={model!r} "
|
||||
f"tokens={total_tokens} (in={input_tokens} out={output_tokens}) "
|
||||
f"reply={reply!r}"
|
||||
)
|
||||
return jsonify({'ok': True, 'text': reply, 'tokens': total_tokens})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM connection test FAILED: model={model!r} api_base={api_base!r} error={e}")
|
||||
logger.exception("LLM connection test full traceback:")
|
||||
return jsonify({'ok': False, 'error': str(e)}), 400
|
||||
|
||||
@llm_blueprint.route("/clear", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def llm_clear():
|
||||
logger.debug("LLM configuration cleared by user")
|
||||
datastore.data['settings']['application'].pop('llm', None)
|
||||
datastore.commit()
|
||||
flash(gettext("AI / LLM configuration removed."), 'notice')
|
||||
return redirect(url_for('settings.settings_page') + '#ai')
|
||||
|
||||
@llm_blueprint.route("/clear-summary-cache", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def llm_clear_summary_cache():
|
||||
import glob
|
||||
count = 0
|
||||
for watch in datastore.data['watching'].values():
|
||||
if not watch.data_dir:
|
||||
continue
|
||||
for f in glob.glob(os.path.join(watch.data_dir, 'change-summary-*.txt')):
|
||||
try:
|
||||
os.remove(f)
|
||||
logger.info(f"LLM summary cache removed: {f}")
|
||||
count += 1
|
||||
except OSError as e:
|
||||
logger.warning(f"Could not remove LLM summary cache file {f}: {e}")
|
||||
logger.info(f"LLM summary cache cleared: {count} file(s) removed")
|
||||
flash(gettext("AI summary cache cleared (%(n)s file(s) removed).", n=count), 'notice')
|
||||
return redirect(url_for('settings.settings_page') + '#ai')
|
||||
|
||||
return llm_blueprint
|
||||
@@ -9,7 +9,6 @@
|
||||
const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
|
||||
{% endif %}
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
||||
@@ -26,7 +25,7 @@
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
@@ -34,7 +33,6 @@
|
||||
<li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#ai">{{ _('AI / LLM') }}</a></li>
|
||||
<li class="tab"><a href="#info">{{ _('Info') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
@@ -58,7 +56,7 @@
|
||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
||||
<span class="pure-form-message-inline">{{ _('After this many consecutive times that the CSS/xPath filter is missing, send a notification') }}
|
||||
<br>
|
||||
{{ _('Set to <strong>0</strong> to disable')|safe }}
|
||||
{{ _('Set to') }} <strong>0</strong> {{ _('to disable') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
@@ -120,15 +118,15 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>{{ _('Use the <strong>Basic</strong> method (default) where your watched sites don\'t need Javascript to render.')|safe }}</p>
|
||||
<p>{{ _('The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.')|safe }}</p>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched sites don\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var') }} 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<br>
|
||||
{{ _('This will wait <i>n</i> seconds before extracting the text.')|safe }}
|
||||
{{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.webdriver_delay) }}
|
||||
@@ -156,8 +154,9 @@
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -197,7 +196,7 @@ nav
|
||||
<span class="pure-form-message-inline">{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>{{ _('Matching text will be ignored in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
|
||||
<li>{{ _('Matching text will be') }} <strong>{{ _('ignored') }}</strong> {{ _('in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
|
||||
<li>{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</li>
|
||||
<li>{{ _('Each line processed separately, any line matching will be ignored (removed before creating the checksum)') }}</li>
|
||||
<li>{{ _('Regular Expression support, wrap the entire line in forward slash') }} <code>/regex/</code></li>
|
||||
@@ -265,7 +264,7 @@ nav
|
||||
</div>
|
||||
<div>
|
||||
{{ render_field(form.application.form.rss_template_override) }}
|
||||
{{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss", settings_application=settings_application) }}
|
||||
{{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss") }}
|
||||
</div>
|
||||
</div>
|
||||
<br>
|
||||
@@ -353,7 +352,7 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
@@ -394,7 +393,6 @@ nav
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
{% include 'settings_llm_tab.html' %}
|
||||
<div class="tab-pane-inner" id="info">
|
||||
<p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
|
||||
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
|
||||
|
||||
@@ -1,528 +0,0 @@
|
||||
{% from '_helpers.html' import render_field %}
|
||||
{% from '_stab.html' import stab_shell, stab_pane %}
|
||||
{#
|
||||
AI / LLM settings tab content — included from settings.html.
|
||||
Requires template context: form, llm_config, llm_env_configured
|
||||
#}
|
||||
<div class="tab-pane-inner" id="ai">
|
||||
<script src="{{ url_for('static_content', group='js', filename='sub-tabs.js') }}"></script>
|
||||
|
||||
{# TRANSLATORS: 'Usage' here means token consumption/cost stats for the AI provider, not a how-to guide #}
|
||||
{% set _usage_label = pgettext('AI usage stats', 'Usage') %}
|
||||
{% call stab_shell('ai-settings', [
|
||||
{'id': 'overview', 'label': _('Overview'), 'icon': '✦'},
|
||||
{'id': 'provider', 'label': _('Provider'), 'icon': '⚙'},
|
||||
{'id': 'prompts', 'label': _('Prompts'), 'icon': '≡'},
|
||||
{'id': 'behaviour', 'label': _('Behaviour'), 'icon': '⚑'},
|
||||
{'id': 'usage', 'label': _usage_label, 'icon': '$'},
|
||||
]) %}
|
||||
|
||||
{# ── Overview ──────────────────────────────────────────────────────────── #}
|
||||
{% call stab_pane('overview') %}
|
||||
<div class="stab-overview-hero">
|
||||
<h3><span class="stab-overview-glyph">✦</span> {{ _('AI-powered change monitoring') }}</h3>
|
||||
<p>{{ _('Connect an LLM to move from "something changed" to "only the thing you care about changed".') }}</p>
|
||||
</div>
|
||||
|
||||
<div class="stab-overview-features">
|
||||
<div class="stab-overview-feature">
|
||||
<div class="stab-overview-icon">⊞</div>
|
||||
<div class="stab-overview-text">
|
||||
<strong>{{ _('Intent filtering') }}</strong>
|
||||
<p>{{ _('Each watch or tag can carry a plain-text intent — %(ex1)s or %(ex2)s. On every detected change the AI evaluates the diff against it and suppresses irrelevant noise.', ex1='<strong>"notify me only when the price drops"</strong>', ex2='<strong>"alert when the item goes out of stock"</strong>') | safe }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="stab-overview-feature">
|
||||
<div class="stab-overview-icon">≡</div>
|
||||
<div class="stab-overview-text">
|
||||
<strong>{{ _('AI Change Summary') }}</strong>
|
||||
<p>{{ _('Instead of raw diffs, receive plain-language summaries in notifications — %(ex1)s or %(ex2)s. Set a global default prompt here, or override per watch or tag.', ex1='<strong>"Price dropped from $89 to $67"</strong>', ex2='<strong>"3 new items added to the listing"</strong>') | safe }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="stab-overview-feature">
|
||||
<div class="stab-overview-icon">≈</div>
|
||||
<div class="stab-overview-text">
|
||||
<strong>{{ _('Minimal cost') }}</strong>
|
||||
<p>{{ _('The AI sees only a unified diff of what changed — never full page HTML. Low-cost models like %(gpt)s or %(gemini)s handle this well, typically fractions of a cent per check.',
|
||||
gpt='<a href="https://platform.openai.com/api-keys" target="_blank" rel="noopener">gpt-4o-mini</a>',
|
||||
gemini='<a href="https://aistudio.google.com/apikey" target="_blank" rel="noopener">Gemini Flash</a>') | safe }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="stab-overview-cta">
|
||||
{% if llm_config and llm_config.get('model') %}
|
||||
<span class="stab-configured-badge">✓ {{ _('AI / LLM configured:') }} {{ llm_config.get('model') }}</span>
|
||||
{% else %}
|
||||
<button type="button" class="pure-button pure-button-primary" data-stab-goto="provider">
|
||||
⚙ {{ _('Configure AI Provider') }} →
|
||||
</button>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endcall %}
|
||||
|
||||
{# ── Provider ──────────────────────────────────────────────────────────── #}
|
||||
{% call stab_pane('provider') %}
|
||||
<p class="stab-section-title">{{ _('AI Provider') }}</p>
|
||||
|
||||
{% if not llm_env_configured and not (llm_config and llm_config.get('model')) %}
|
||||
<div class="stab-overview-disclaimer">
|
||||
<div class="stab-disclaimer-icon">⚠</div>
|
||||
<div class="stab-disclaimer-body">
|
||||
<strong>{{ _('Third-party data transfer — please read') }}</strong>
|
||||
<p>{{ _('When AI features are active, change data from the websites you monitor — including page diffs and extracted text — is sent to an external AI provider of your choice.') }}</p>
|
||||
<ul>
|
||||
<li>{{ _('You are solely responsible for ensuring this complies with the terms of service of each website you monitor.') }}</li>
|
||||
<li>{{ _("You are solely responsible for compliance with applicable data-protection laws (e.g. GDPR) regarding any personal data that may appear in monitored content.") }}</li>
|
||||
<li>{{ _('API costs charged by your chosen provider are your own responsibility; this software has no visibility into or control over those charges.') }}</li>
|
||||
<li>{{ _('AI / LLM models are known to hallucinate — producing plausible-sounding but factually incorrect or entirely fabricated output with apparent confidence — and by design may omit or truncate relevant data during summarisation. AI output must never be relied upon as complete or accurate. This software is provided as-is with no warranty of any kind.') }}</li>
|
||||
<li>{{ _('By enabling AI features you personally indemnify and hold harmless the creator(s) and contributor(s) of this software from any claims, damages, or liability arising from this data transfer or your use of AI features.') }}</li>
|
||||
</ul>
|
||||
<label class="stab-disclaimer-check">
|
||||
<input type="checkbox" id="llm-disclaimer-accept" onchange="llmDisclaimerToggle(this)">
|
||||
<span>{{ _('I have read and understood the above. I accept full responsibility and indemnify the creator(s) of this software.') }}</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div id="llm-provider-fields" style="display:none">
|
||||
{% endif %}
|
||||
|
||||
{% if llm_env_configured %}
|
||||
<div class="inline-warning" style="margin-bottom: 1em;">
|
||||
<img class="inline-warning-icon" src="{{ url_for('static_content', group='images', filename='notice.svg') }}" alt="{{ _('Note') }}">
|
||||
{{ _('AI / LLM is configured via environment variables (<code>LLM_MODEL=%(model)s</code>%(api_base)s). Remove the <code>LLM_MODEL</code> environment variable to configure via this form instead.',
|
||||
model=llm_config.get('model', '')|e,
|
||||
api_base=(', <code>LLM_API_BASE=' ~ (llm_config.get('api_base')|e) ~ '</code>') if llm_config.get('api_base') else '') | safe }}
|
||||
</div>
|
||||
{% else %}
|
||||
|
||||
<div class="pure-control-group">
|
||||
<label for="llm-provider">{{ _('Provider') }}</label>
|
||||
<select id="llm-provider" onchange="llmOnProviderChange(this.value)">
|
||||
<option value="">— {{ _('select a provider') }} —</option>
|
||||
<optgroup label="OpenAI">
|
||||
<option value="openai">OpenAI</option>
|
||||
</optgroup>
|
||||
<optgroup label="Anthropic">
|
||||
<option value="anthropic">Anthropic</option>
|
||||
</optgroup>
|
||||
<optgroup label="Google">
|
||||
<option value="gemini">Google (Gemini)</option>
|
||||
</optgroup>
|
||||
<optgroup label="{{ _('Local / Self-hosted') }}">
|
||||
<option value="ollama">Ollama (local)</option>
|
||||
</optgroup>
|
||||
<optgroup label="OpenRouter">
|
||||
<option value="openrouter">OpenRouter (200+ models)</option>
|
||||
</optgroup>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.llm.form.llm_api_key) }}
|
||||
<span class="pure-form-message-inline" id="llm-key-hint"></span>
|
||||
</div>
|
||||
<div class="pure-control-group" id="llm-base-group" style="display:none">
|
||||
{{ render_field(form.llm.form.llm_api_base) }}
|
||||
<span class="pure-form-message-inline">{{ _('Only needed for Ollama or custom/self-hosted endpoints. Leave blank for cloud providers.') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group" id="llm-fetch-group" style="display:none">
|
||||
<label></label>
|
||||
<button type="button" id="llm-fetch-btn" class="pure-button button-xsmall" onclick="llmFetchModels()"
|
||||
style="background:#27ae60;color:#fff;border:none;">
|
||||
↻ {{ _('Load available models') }}
|
||||
</button>
|
||||
<span id="llm-fetch-status" style="margin-left:.6em;font-size:.85em;color:#888;"></span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group" id="llm-model-select-group" style="display:none">
|
||||
<label for="llm-model-select">{{ _('Available models') }}</label>
|
||||
<select id="llm-model-select" class="pure-input-1-2" onchange="llmOnModelPick(this.value)">
|
||||
<option value="">— {{ _('choose a model') }} —</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.llm.form.llm_model,
|
||||
readonly=True,
|
||||
placeholder=_("Enter API key and click 'Load available models'")) }}
|
||||
</div>
|
||||
|
||||
{% if llm_config and llm_config.get('model') %}
|
||||
<div class="pure-control-group">
|
||||
<label></label>
|
||||
<span style="color:#4a7c59;font-weight:bold;">
|
||||
✓ {{ _('AI / LLM configured:') }} {{ llm_config.get('model') }}
|
||||
</span>
|
||||
|
||||
<a href="{{ url_for('settings.llm.llm_clear') }}"
|
||||
class="pure-button button-xsmall"
|
||||
style="background:#c0392b;color:#fff;"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Remove AI / LLM configuration?') }}"
|
||||
data-confirm-message="<p>{{ _('This will remove your saved AI provider, model, and API key.') }}</p>"
|
||||
data-confirm-button="{{ _('Remove') }}"
|
||||
data-cancel-button="{{ _('Cancel') }}">
|
||||
✕ {{ _('Remove') }}
|
||||
</a>
|
||||
|
||||
<button type="button" id="llm-test-btn" class="pure-button button-xsmall" onclick="llmRunTest()"
|
||||
style="background:#2980b9;color:#fff;border:none;">
|
||||
▶ {{ _('Test connection') }}
|
||||
</button>
|
||||
</div>
|
||||
<div id="llm-test-result" style="display:none; margin-top:0.6em; padding:0.6em 0.85em; border-radius:5px; font-size:0.88em; line-height:1.45;"></div>
|
||||
{% endif %}
|
||||
|
||||
<p class="pure-form-message-inline" style="margin-top:0.5em;">
|
||||
{{ _("Your API key is stored locally and sent only to your chosen provider. On each detected change, the watch's diff and extracted text are sent to the LLM — no full page HTML.") }}
|
||||
</p>
|
||||
|
||||
<div class="pure-control-group" style="margin-top:1.2em; padding-top:1em; border-top:1px solid rgba(128,128,128,0.15);">
|
||||
<label style="color:#888; font-size:0.85em;">{{ _('Cache') }}</label>
|
||||
<a href="{{ url_for('settings.llm.llm_clear_summary_cache') }}"
|
||||
class="pure-button button-xsmall"
|
||||
style="background:#7f8c8d;color:#fff;"
|
||||
data-requires-confirm
|
||||
data-confirm-type="warning"
|
||||
data-confirm-title="{{ _('Clear all summary cache?') }}"
|
||||
data-confirm-message="<p>{{ _('This will remove all cached AI change summaries across all watches.') }}</p><p>{{ _('They will be regenerated on the next check.') }}</p>"
|
||||
data-confirm-button="{{ _('Clear cache') }}"
|
||||
data-cancel-button="{{ _('Cancel') }}">
|
||||
✕ {{ _('Clear all summary cache') }}
|
||||
</a>
|
||||
<span class="pure-form-message-inline">{{ _('Removes all cached AI change summaries across all watches. They will be regenerated on the next check.') }}</span>
|
||||
</div>
|
||||
{% endif %}{# llm_env_configured #}
|
||||
|
||||
{% if not llm_env_configured and not (llm_config and llm_config.get('model')) %}
|
||||
</div>{# llm-provider-fields #}
|
||||
{% endif %}
|
||||
{% endcall %}
|
||||
|
||||
{# ── Prompts ───────────────────────────────────────────────────────────── #}
|
||||
{% call stab_pane('prompts') %}
|
||||
<p class="stab-section-title">{{ _('Default AI Change Summary') }}</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.llm.form.llm_change_summary_default) }}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Used for all watches unless overridden by the watch or its tag/group.') }}
|
||||
<a href="#" class="pure-button button-small" onclick="var t=document.getElementById('llm-llm_change_summary_default'); if(!t.value && t.placeholder) t.value=t.placeholder; return false;">{{ _('Modify default prompt') }}</a>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{% endcall %}
|
||||
|
||||
{# ── Behaviour ─────────────────────────────────────────────────────────── #}
|
||||
{% call stab_pane('behaviour') %}
|
||||
<p class="stab-section-title">{{ _('Behaviour') }}</p>
|
||||
|
||||
{% if llm_config and llm_config.get('model') %}
|
||||
<div class="pure-control-group">
|
||||
<label></label>
|
||||
{{ form.llm.form.llm_override_diff_with_summary() }}
|
||||
<label for="{{ form.llm.form.llm_override_diff_with_summary.id }}" style="display:inline; font-weight:normal;">
|
||||
{{ form.llm.form.llm_override_diff_with_summary.label.text }}
|
||||
</label>
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('When enabled, the <code>%(diff)s</code> notification token shows the AI summary instead of the raw diff. Use <code>%(raw_diff)s</code> to always get the original.',
|
||||
diff='{{diff}}', raw_diff='{{raw_diff}}') | safe }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
<label></label>
|
||||
{{ form.llm.form.llm_restock_use_fallback_extract() }}
|
||||
<label for="{{ form.llm.form.llm_restock_use_fallback_extract.id }}" style="display:inline; font-weight:normal;">
|
||||
{{ form.llm.form.llm_restock_use_fallback_extract.label.text }}
|
||||
</label>
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('When enabled, the AI will be used as a last resort to extract price and stock status from product pages where no structured metadata (JSON-LD, microdata, OpenGraph) is found.') }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
<label for="{{ form.llm.form.llm_thinking_budget.id }}">{{ form.llm.form.llm_thinking_budget.label.text }}</label>
|
||||
{{ form.llm.form.llm_thinking_budget() }}
|
||||
<span class="pure-form-message-inline">{{ _('For Gemini 2.5+ models only. Thinking tokens improve reasoning quality but count against the output budget. Set to Off if summaries are being cut short.') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
<label for="{{ form.llm.form.llm_max_summary_tokens.id }}">{{ form.llm.form.llm_max_summary_tokens.label.text }}</label>
|
||||
{{ form.llm.form.llm_max_summary_tokens() }}
|
||||
<span class="pure-form-message-inline">{{ _('Upper limit on tokens the AI may use when writing a change summary. Higher values allow longer summaries but cost more.') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
<label>{{ form.llm.form.llm_budget_action.label.text }}</label>
|
||||
<div>
|
||||
{% for subfield in form.llm.form.llm_budget_action %}
|
||||
<label class="pure-radio" style="display:block; font-weight:normal; margin-bottom:0.3em;">
|
||||
{{ subfield() }} {{ subfield.label.text }}
|
||||
</label>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<p class="pure-form-message-inline" style="margin-top:0.5em;">
|
||||
{{ _('Configure a provider first to unlock behaviour settings.') }}
|
||||
</p>
|
||||
{% endif %}
|
||||
{% endcall %}
|
||||
|
||||
{# ── Usage ─────────────────────────────────────────────────────────────── #}
|
||||
{% call stab_pane('usage') %}
|
||||
<p class="stab-section-title">{{ _('Token & Cost Tracking') }}</p>
|
||||
|
||||
{% if llm_stored.get('tokens_total_cumulative') or llm_stored.get('tokens_this_month') %}
|
||||
|
||||
<div class="llm-usage-grid">
|
||||
<div class="llm-stat-card">
|
||||
<div class="llm-stat-label">{{ _('This month') }}</div>
|
||||
<div class="llm-stat-value">{{ '{:,}'.format(llm_stored.get('tokens_this_month', 0)) }}</div>
|
||||
<div class="llm-stat-sub">{{ _('tokens') }}{% if llm_show_costs and llm_stored.get('cost_usd_this_month') %} · ≈ ${{ '%.4f'|format(llm_stored.get('cost_usd_this_month', 0)) }}{% endif %}</div>
|
||||
{% if llm_token_budget_month %}
|
||||
{% set pct = (llm_stored.get('tokens_this_month', 0) / llm_token_budget_month * 100)|int %}
|
||||
<div class="llm-stat-bar-wrap">
|
||||
<div class="llm-stat-bar-fill {% if pct >= 100 %}bar-over{% elif pct >= 80 %}bar-warn{% else %}bar-ok{% endif %}"
|
||||
style="width:{{ [pct, 100]|min }}%"></div>
|
||||
</div>
|
||||
<div class="llm-stat-budget-text">{{ _('%(percent)s%% of %(budget)s', percent=pct, budget='{:,}'.format(llm_token_budget_month)) }}</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="llm-stat-card">
|
||||
<div class="llm-stat-label">{{ _('All-time total') }}</div>
|
||||
<div class="llm-stat-value">{{ '{:,}'.format(llm_stored.get('tokens_total_cumulative', 0)) }}</div>
|
||||
<div class="llm-stat-sub">{{ _('tokens') }}{% if llm_show_costs and llm_stored.get('cost_usd_total_cumulative') %} · ≈ ${{ '%.4f'|format(llm_stored.get('cost_usd_total_cumulative', 0)) }}{% endif %}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if llm_token_budget_month and llm_stored.get('tokens_this_month', 0) >= llm_token_budget_month %}
|
||||
<p class="llm-budget-alert">⚠ {{ _('Monthly token budget reached. AI summarisation is paused until next month.') }}</p>
|
||||
{% endif %}
|
||||
|
||||
<div class="llm-usage-settings">
|
||||
<div class="llm-usage-row">
|
||||
<span class="llm-usage-row-label">{{ _('Token budget this period') }}</span>
|
||||
<span class="llm-usage-row-value">
|
||||
{% if llm_token_budget_month_env %}
|
||||
<strong>{{ '{:,}'.format(llm_token_budget_month_env) }}</strong>
|
||||
<span class="llm-env-badge">{{ _('(set via <code>LLM_TOKEN_BUDGET_MONTH</code>)') | safe }}</span>
|
||||
<input type="hidden" name="llm-llm_token_budget_month" value="{{ llm_token_budget_month_env }}">
|
||||
{% else %}
|
||||
{{ form.llm.form.llm_token_budget_month(placeholder=_('0 = unlimited'), value=llm_stored.get('token_budget_month', 0) or '') }}
|
||||
<span class="llm-field-hint">{{ _('tokens (0 = unlimited)') }}</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
{% if llm_stored.get('tokens_month_key') %}
|
||||
<div class="llm-usage-row">
|
||||
<span class="llm-usage-row-label">{{ _('Current billing period') }}</span>
|
||||
<span class="llm-usage-row-value">{{ llm_stored.get('tokens_month_key') }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="llm-usage-row">
|
||||
<span class="llm-usage-row-label">{{ _('Max input characters') }}</span>
|
||||
<span class="llm-usage-row-value">
|
||||
{% if llm_max_input_chars_env %}
|
||||
{{ form.llm.form.llm_max_input_chars(value=llm_max_input_chars_env, readonly=True, style="width:10em;opacity:0.6;cursor:not-allowed;") }}
|
||||
<span class="llm-env-badge">{{ _('(set via <code>LLM_MAX_INPUT_CHARS</code>)') | safe }}</span>
|
||||
{% else %}
|
||||
{{ form.llm.form.llm_max_input_chars(placeholder='100000', value=llm_stored.get('max_input_chars', 100000) or '') }}
|
||||
<span class="llm-field-hint">{{ _('characters — currently enforcing: %(n)s', n='{:,}'.format(llm_effective_max_input_chars)) }}</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% else %}
|
||||
<p class="llm-no-usage">{{ _('No AI usage recorded yet.') }}</p>
|
||||
|
||||
<div class="llm-usage-settings">
|
||||
<div class="llm-usage-row">
|
||||
<span class="llm-usage-row-label">{{ _('Token budget') }}</span>
|
||||
<span class="llm-usage-row-value">
|
||||
{% if llm_token_budget_month_env %}
|
||||
<strong>{{ '{:,}'.format(llm_token_budget_month_env) }}</strong>
|
||||
<span class="llm-env-badge">{{ _('(set via <code>LLM_TOKEN_BUDGET_MONTH</code>)') | safe }}</span>
|
||||
<input type="hidden" name="llm-llm_token_budget_month" value="{{ llm_token_budget_month_env }}">
|
||||
{% else %}
|
||||
{{ form.llm.form.llm_token_budget_month(placeholder=_('0 = unlimited'), value=llm_stored.get('token_budget_month', 0) or '') }}
|
||||
<span class="llm-field-hint">{{ _('tokens per month (0 = unlimited)') }}</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
<div class="llm-usage-row">
|
||||
<span class="llm-usage-row-label">{{ _('Max input characters') }}</span>
|
||||
<span class="llm-usage-row-value">
|
||||
{% if llm_max_input_chars_env %}
|
||||
{{ form.llm.form.llm_max_input_chars(value=llm_max_input_chars_env, readonly=True, style="width:10em;opacity:0.6;cursor:not-allowed;") }}
|
||||
<span class="llm-env-badge">{{ _('(set via <code>LLM_MAX_INPUT_CHARS</code>)') | safe }}</span>
|
||||
{% else %}
|
||||
{{ form.llm.form.llm_max_input_chars(placeholder='100000', value=llm_stored.get('max_input_chars', 100000) or '') }}
|
||||
<span class="llm-field-hint">{{ _('characters — currently enforcing: %(n)s', n='{:,}'.format(llm_effective_max_input_chars)) }}</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endcall %}
|
||||
|
||||
{% endcall %}{# stab_shell #}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
(function () {
|
||||
const LIVE_PROVIDERS = ['openai', 'anthropic', 'gemini', 'ollama'];
|
||||
const BASE_DEFAULTS = { ollama: 'http://localhost:11434' };
|
||||
const KEY_HINTS = {
|
||||
openai: '{{ _("platform.openai.com → API keys") }}',
|
||||
anthropic: '{{ _("console.anthropic.com → API keys") }}',
|
||||
gemini: '{{ _("aistudio.google.com → Get API key") }}',
|
||||
ollama: '{{ _("No API key needed for local Ollama") }}',
|
||||
openrouter: '{{ _("openrouter.ai → Keys") }}',
|
||||
};
|
||||
|
||||
window.llmDisclaimerToggle = function (cb) {
|
||||
const fields = document.getElementById('llm-provider-fields');
|
||||
if (fields) fields.style.display = cb.checked ? '' : 'none';
|
||||
};
|
||||
|
||||
window.llmOnProviderChange = function (provider) {
|
||||
const fetchGroup = document.getElementById('llm-fetch-group');
|
||||
const baseGroup = document.getElementById('llm-base-group');
|
||||
const modelSelGrp = document.getElementById('llm-model-select-group');
|
||||
const baseField = document.querySelector('[name="llm-llm_api_base"]');
|
||||
const hint = document.getElementById('llm-key-hint');
|
||||
|
||||
fetchGroup.style.display = LIVE_PROVIDERS.includes(provider) ? '' : 'none';
|
||||
|
||||
const needsBase = provider === 'ollama' || provider === 'openrouter';
|
||||
baseGroup.style.display = needsBase ? '' : 'none';
|
||||
if (BASE_DEFAULTS[provider] !== undefined) {
|
||||
if (!baseField.value) baseField.value = BASE_DEFAULTS[provider];
|
||||
}
|
||||
|
||||
hint.textContent = KEY_HINTS[provider] || '';
|
||||
modelSelGrp.style.display = 'none';
|
||||
document.getElementById('llm-fetch-status').textContent = '';
|
||||
};
|
||||
|
||||
window.llmFetchModels = async function () {
|
||||
const provider = document.getElementById('llm-provider').value;
|
||||
const apiKey = document.querySelector('[name="llm-llm_api_key"]').value.trim();
|
||||
const apiBase = document.querySelector('[name="llm-llm_api_base"]').value.trim();
|
||||
const btn = document.getElementById('llm-fetch-btn');
|
||||
const statusEl = document.getElementById('llm-fetch-status');
|
||||
const selGroup = document.getElementById('llm-model-select-group');
|
||||
const modelSel = document.getElementById('llm-model-select');
|
||||
|
||||
if (!provider) { statusEl.textContent = '{{ _("Select a provider first.") }}'; return; }
|
||||
|
||||
btn.disabled = true;
|
||||
btn.textContent = '⏳ {{ _("Loading…") }}';
|
||||
statusEl.textContent = '';
|
||||
|
||||
const params = new URLSearchParams({ provider });
|
||||
if (apiKey) params.set('api_key', apiKey);
|
||||
if (apiBase) params.set('api_base', apiBase);
|
||||
|
||||
try {
|
||||
const resp = await fetch('{{ url_for("settings.llm.llm_get_models") }}?' + params);
|
||||
const data = await resp.json();
|
||||
|
||||
if (data.error) {
|
||||
statusEl.style.color = '#c0392b';
|
||||
statusEl.textContent = '✗ ' + data.error;
|
||||
selGroup.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
|
||||
if (!data.models || data.models.length === 0) {
|
||||
statusEl.style.color = '#e67e22';
|
||||
statusEl.textContent = '{{ _("No models returned — check your API key.") }}';
|
||||
selGroup.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
|
||||
modelSel.innerHTML = '<option value="">{{ _("— choose a model —") }}</option>';
|
||||
const currentModel = document.querySelector('[name="llm-llm_model"]').value.trim();
|
||||
for (const m of data.models) {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = m;
|
||||
opt.textContent = m;
|
||||
if (m === currentModel) opt.selected = true;
|
||||
modelSel.appendChild(opt);
|
||||
}
|
||||
|
||||
selGroup.style.display = '';
|
||||
statusEl.style.color = '#27ae60';
|
||||
statusEl.textContent = '✓ ' + data.models.length + ' {{ _("models available with your key") }}';
|
||||
} catch (e) {
|
||||
statusEl.style.color = '#c0392b';
|
||||
statusEl.textContent = '✗ {{ _("Request failed") }}: ' + e.message;
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = '↻ {{ _("Load available models") }}';
|
||||
}
|
||||
};
|
||||
|
||||
window.llmOnModelPick = function (value) {
|
||||
if (value) document.querySelector('[name="llm-llm_model"]').value = value;
|
||||
};
|
||||
|
||||
window.llmRunTest = async function () {
|
||||
const btn = document.getElementById('llm-test-btn');
|
||||
const result = document.getElementById('llm-test-result');
|
||||
if (!btn || !result) return;
|
||||
|
||||
btn.disabled = true;
|
||||
btn.textContent = '⏳ {{ _("Testing…") }}';
|
||||
result.style.display = 'none';
|
||||
|
||||
try {
|
||||
const resp = await fetch('{{ url_for("settings.llm.llm_test") }}');
|
||||
const data = await resp.json();
|
||||
if (data.ok) {
|
||||
result.style.cssText = 'display:block; background:rgba(39,174,96,0.08); border:1px solid rgba(39,174,96,0.3); border-radius:5px; padding:0.6em 0.85em; font-size:0.88em; line-height:1.45;';
|
||||
result.innerHTML = '<span style="color:#27ae60; font-weight:600;">✓ {{ _("Connected") }}</span>'
|
||||
+ (data.tokens ? ' <span style="opacity:0.55; font-size:0.9em;">(' + data.tokens + ' {{ _("tokens") }})</span>' : '')
|
||||
+ '<br><em style="opacity:0.75;">' + data.text.replace(/</g,'<') + '</em>';
|
||||
} else {
|
||||
result.style.cssText = 'display:block; background:rgba(192,57,43,0.07); border:1px solid rgba(192,57,43,0.25); border-radius:5px; padding:0.6em 0.85em; font-size:0.88em; line-height:1.45;';
|
||||
result.innerHTML = '<span style="color:#c0392b; font-weight:600;">✗ {{ _("Failed") }}</span><br><code style="font-size:0.92em; word-break:break-all;">' + (data.error || '').replace(/</g,'<') + '</code>';
|
||||
}
|
||||
} catch (e) {
|
||||
result.style.cssText = 'display:block; background:rgba(192,57,43,0.07); border:1px solid rgba(192,57,43,0.25); border-radius:5px; padding:0.6em 0.85em; font-size:0.88em;';
|
||||
result.innerHTML = '<span style="color:#c0392b; font-weight:600;">✗ {{ _("Request failed") }}</span>: ' + e.message.replace(/</g,'<');
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = '▶ {{ _("Test connection") }}';
|
||||
}
|
||||
};
|
||||
|
||||
// On page load: detect and pre-select provider from current model
|
||||
(function detectCurrentProvider() {
|
||||
const modelField = document.querySelector('[name="llm-llm_model"]');
|
||||
if (!modelField) return;
|
||||
const m = modelField.value.trim();
|
||||
if (!m) return;
|
||||
|
||||
let guessed = '';
|
||||
if (m.startsWith('gemini/')) guessed = 'gemini';
|
||||
else if (m.startsWith('ollama/')) guessed = 'ollama';
|
||||
else if (m.startsWith('openrouter/')) guessed = 'openrouter';
|
||||
else if (m.startsWith('claude')) guessed = 'anthropic';
|
||||
else if (m.startsWith('gpt') || m.startsWith('o1') || m.startsWith('o3')) guessed = 'openai';
|
||||
|
||||
if (guessed) {
|
||||
const sel = document.getElementById('llm-provider');
|
||||
if (sel) { sel.value = guessed; llmOnProviderChange(guessed); }
|
||||
}
|
||||
})();
|
||||
}());
|
||||
</script>
|
||||
@@ -5,7 +5,6 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@@ -23,14 +22,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
|
||||
|
||||
from changedetectionio import processors
|
||||
output = render_template("groups-overview.html",
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
available_tags=sorted_tags,
|
||||
form=add_form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
tag_count=tag_count,
|
||||
wcag_text_color=processors.wcag_text_color,
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -58,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/mute/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def mute(uuid):
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
@@ -67,13 +63,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
tag.commit()
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(uuid):
|
||||
# Delete the tag from settings immediately
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
del datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches in background thread to avoid blocking
|
||||
def remove_tag_background(tag_uuid):
|
||||
"""Background thread to remove tag from watches - discarded after completion."""
|
||||
@@ -94,7 +101,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Tag deleted, removing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/unlink/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def unlink(uuid):
|
||||
# Unlink tag from all watches in background thread to avoid blocking
|
||||
@@ -120,11 +127,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
|
||||
# Delete all tag.json files
|
||||
import os
|
||||
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
||||
# TagsDict 'del' handler will remove the dir
|
||||
del datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
|
||||
|
||||
# Clear all tags from settings immediately
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
|
||||
# Clear tags from all watches in background thread to avoid blocking
|
||||
def clear_all_tags_background():
|
||||
@@ -145,7 +160,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("All tags deleted, clearing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
@@ -164,27 +179,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
default_system_settings = datastore.data['settings'],
|
||||
)
|
||||
|
||||
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
|
||||
# The API stores processor_config_restock_diff in the tag dict; find the matching
|
||||
# FormField by checking which one's sub-fields cover the config keys.
|
||||
from wtforms.fields.form import FormField as WTFormField
|
||||
for key, value in default.items():
|
||||
if not key.startswith('processor_config_') or not isinstance(value, dict):
|
||||
continue
|
||||
for form_field in form:
|
||||
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
|
||||
for sub_key, sub_value in value.items():
|
||||
sub_field = form_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
break
|
||||
|
||||
template_args = {
|
||||
'data': default,
|
||||
'form': form,
|
||||
'watch': default,
|
||||
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||
'llm_configured': bool(_get_llm_config(datastore)),
|
||||
}
|
||||
|
||||
included_content = {}
|
||||
@@ -213,17 +212,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
template = env.from_string(template_str)
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
# Watches whose URL currently matches this tag's pattern
|
||||
matching_watches = {
|
||||
w_uuid: watch
|
||||
for w_uuid, watch in datastore.data['watching'].items()
|
||||
if default.matches_url(watch.get('url', ''))
|
||||
}
|
||||
|
||||
output = render_template("edit-tag.html",
|
||||
extra_form_content=included_content,
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
matching_watches=matching_watches,
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
**template_args
|
||||
)
|
||||
@@ -231,7 +222,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return output
|
||||
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['POST'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit_submit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
@@ -264,4 +255,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
def form_tag_delete(uuid):
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
return tags_blueprint
|
||||
|
||||
@@ -2,31 +2,20 @@ from wtforms import (
|
||||
Form,
|
||||
StringField,
|
||||
SubmitField,
|
||||
TextAreaField,
|
||||
validators,
|
||||
)
|
||||
from wtforms.fields.simple import BooleanField
|
||||
from flask_babel import lazy_gettext as _l
|
||||
|
||||
from changedetectionio.processors.restock_diff.forms import processor_settings_form as restock_settings_form
|
||||
from changedetectionio.llm.ui_strings import LLM_INTENT_TAG_PLACEHOLDER
|
||||
from changedetectionio.llm.evaluator import DEFAULT_CHANGE_SUMMARY_PROMPT
|
||||
|
||||
class group_restock_settings_form(restock_settings_form):
|
||||
overrides_watch = BooleanField(_l('Activate for individual watches in this tag/group?'), default=False)
|
||||
url_match_pattern = StringField(_l('Auto-apply to watches with URLs matching'),
|
||||
render_kw={"placeholder": _l("e.g. *://example.com/* or github.com/myorg")})
|
||||
tag_colour = StringField(_l('Tag colour'), default='')
|
||||
llm_intent = TextAreaField('AI Change Intent',
|
||||
validators=[validators.Optional(), validators.Length(max=2000)],
|
||||
render_kw={"rows": "5", "placeholder": LLM_INTENT_TAG_PLACEHOLDER})
|
||||
|
||||
llm_change_summary = TextAreaField('AI Change Summary',
|
||||
validators=[validators.Optional(), validators.Length(max=2000)],
|
||||
render_kw={"rows": "5", "placeholder": DEFAULT_CHANGE_SUMMARY_PROMPT},
|
||||
default='')
|
||||
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
|
||||
|
||||
class SingleTag(Form):
|
||||
|
||||
name = StringField(_l('Tag name'), [validators.InputRequired()], render_kw={"placeholder": _l("Name")})
|
||||
save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
|
||||
</script>
|
||||
|
||||
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
||||
|
||||
@@ -27,9 +25,6 @@
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">{{ _('General') }}</a></li>
|
||||
{% if llm_configured %}
|
||||
<li class="tab"><a href="#ai-llm">{{ _('AI / LLM') }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
@@ -48,58 +43,11 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url_match_pattern, class="m-d") }}
|
||||
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
|
||||
</div>
|
||||
{% if matching_watches %}
|
||||
<div class="pure-control-group">
|
||||
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
|
||||
<ul class="tag-url-match-list">
|
||||
{% for w_uuid, w in matching_watches.items() %}
|
||||
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="pure-control-group">
|
||||
<label>{{ _('Tag colour') }}</label>
|
||||
<div style="display:flex; align-items:center; gap:0.75em;">
|
||||
<input type="checkbox" id="use_custom_colour"
|
||||
{% if data.get('tag_colour') %}checked{% endif %}>
|
||||
<label for="use_custom_colour" style="margin:0">{{ _('Custom colour') }}</label>
|
||||
<input type="color" id="tag_colour_picker"
|
||||
value="{{ data.get('tag_colour') or '#4f8ef7' }}"
|
||||
{% if not data.get('tag_colour') %}disabled{% endif %}>
|
||||
<input type="hidden" name="tag_colour" id="tag_colour_hidden"
|
||||
value="{{ data.get('tag_colour', '') }}">
|
||||
</div>
|
||||
<span class="pure-form-message-inline">{{ _('Leave unchecked to use the auto-generated colour based on the tag name.') }}</span>
|
||||
</div>
|
||||
<script>
|
||||
(function () {
|
||||
var cb = document.getElementById('use_custom_colour');
|
||||
var picker = document.getElementById('tag_colour_picker');
|
||||
var hidden = document.getElementById('tag_colour_hidden');
|
||||
picker.addEventListener('input', function () { hidden.value = this.value; });
|
||||
cb.addEventListener('change', function () {
|
||||
picker.disabled = !this.checked;
|
||||
hidden.value = this.checked ? picker.value : '';
|
||||
});
|
||||
})();
|
||||
</script>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if llm_configured %}
|
||||
<div class="tab-pane-inner" id="ai-llm">
|
||||
{% include "edit/include_llm_intent.html" %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
<p>{{ _('These settings are <strong><i>added</i></strong> to any existing watch configurations.')|safe }}</p>
|
||||
|
||||
<p>{{ _('These settings are') }} <strong><i>{{ _('added') }}</i></strong> {{ _('to any existing watch configurations.') }}</p>
|
||||
{% include "edit/include_subtract.html" %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
@@ -130,7 +78,7 @@
|
||||
{% if has_default_notification_urls %}
|
||||
<div class="inline-warning">
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
|
||||
{{ _('There are <a href="%(url)s">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.', url=url_for('settings.settings_page') ~ '#notifications')|safe }}
|
||||
{{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
|
||||
|
||||
@@ -3,26 +3,6 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<style>
|
||||
{%- for uuid, tag in available_tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- if tag.get('tag_colour') -%}
|
||||
.watch-tag-list.tag-{{ class_name }} { background-color: {{ tag.tag_colour }}; color: {{ wcag_text_color(tag.tag_colour) }}; }
|
||||
{%- else -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
|
||||
@@ -65,10 +45,10 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{% for uuid, tag in available_tags %}
|
||||
<tr id="{{ uuid }}" class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}">
|
||||
<td class="watch-controls">
|
||||
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('Mute notifications') }}" title="{{ _('Mute notifications') }}" class="icon icon-mute" ></a>
|
||||
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
|
||||
|
||||
@@ -141,7 +141,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
# Import the login decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@ui_blueprint.route("/clear_history/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def clear_watch_history(uuid):
|
||||
try:
|
||||
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def clear_all_history():
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext', '')
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||
if confirmtext == 'clear':
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
@@ -194,9 +194,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
tag_limit = request.args.get('tag')
|
||||
now = int(time.time())
|
||||
|
||||
# Mark watches as viewed - use background thread only for large watch counts
|
||||
def mark_viewed_impl():
|
||||
"""Mark watches as viewed - can run synchronously or in background thread."""
|
||||
# Mark watches as viewed in background thread to avoid blocking
|
||||
def mark_viewed_background():
|
||||
"""Background thread to mark watches as viewed - discarded after completion."""
|
||||
marked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
@@ -209,21 +209,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
marked_count += 1
|
||||
|
||||
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
|
||||
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error marking as viewed: {e}")
|
||||
logger.error(f"Error in background mark as viewed: {e}")
|
||||
|
||||
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
|
||||
# For larger counts, use background thread to avoid blocking the UI
|
||||
watch_count = len(datastore.data['watching'])
|
||||
if watch_count < 10:
|
||||
# Run synchronously for small watch counts
|
||||
mark_viewed_impl()
|
||||
else:
|
||||
# Start background thread for large watch counts
|
||||
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
|
||||
thread.start()
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=mark_viewed_background, daemon=True)
|
||||
thread.start()
|
||||
|
||||
flash(gettext("Marking watches as viewed in background..."))
|
||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||
|
||||
@ui_blueprint.route("/delete", methods=['GET'])
|
||||
@@ -366,7 +360,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
|
||||
@ui_blueprint.route("/share-url/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/share-url/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_share_put_watch(uuid):
|
||||
"""Given a watch UUID, upload the info and return a share-link
|
||||
|
||||
@@ -17,34 +17,6 @@ from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
|
||||
def _clean_litellm_error(exc) -> str:
|
||||
"""Return a short, human-readable error string from a litellm exception.
|
||||
|
||||
litellm embeds the raw provider JSON in str(exc), which can be hundreds of
|
||||
characters of verbose quota detail. We try to pull just the provider's
|
||||
'message' field; failing that we return the first non-empty line with the
|
||||
'litellm.XxxError:' class prefix stripped.
|
||||
"""
|
||||
import json, re
|
||||
raw = str(exc)
|
||||
# Try to parse the embedded JSON block (starts at first '{')
|
||||
brace = raw.find('{')
|
||||
if brace >= 0:
|
||||
try:
|
||||
payload = json.loads(raw[brace:])
|
||||
msg = (payload.get('error') or {}).get('message') or ''
|
||||
if msg:
|
||||
# Take only the first sentence / line — provider messages can be long
|
||||
return msg.split('\n')[0].split('. ')[0].strip() + '.'
|
||||
except Exception:
|
||||
pass
|
||||
# Fallback: strip the "litellm.XxxError: litellm.XxxError: providerException - " prefix
|
||||
first_line = raw.split('\n')[0]
|
||||
first_line = re.sub(r'^(litellm\.\w+:\s*)+', '', first_line)
|
||||
first_line = re.sub(r'\w+Exception\s*-\s*', '', first_line).strip()
|
||||
return first_line or raw.split('\n')[0]
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")
|
||||
|
||||
@@ -94,7 +66,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return Markup(result)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
"""
|
||||
@@ -156,169 +128,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/llm-summary/prompt", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_llm_summary_prompt(uuid):
|
||||
"""Return the effective LLM summary prompt for a watch immediately (no LLM call)."""
|
||||
from flask import jsonify
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return jsonify({'prompt': ''}), 404
|
||||
try:
|
||||
from changedetectionio.llm.evaluator import get_effective_summary_prompt
|
||||
prompt = get_effective_summary_prompt(watch, datastore)
|
||||
except Exception:
|
||||
prompt = ''
|
||||
return jsonify({'prompt': prompt})
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/llm-summary", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_llm_summary(uuid):
|
||||
"""
|
||||
Generate (or return cached) an AI summary of the diff between two snapshots.
|
||||
Called via AJAX from the diff page when no cached summary exists.
|
||||
Returns JSON: {"summary": "...", "error": null} or {"summary": null, "error": "..."}
|
||||
"""
|
||||
import difflib
|
||||
from flask import jsonify
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
return jsonify({'summary': None, 'error': 'Watch not found'}), 404
|
||||
|
||||
llm_cfg = datastore.data.get('settings', {}).get('application', {}).get('llm', {})
|
||||
if not llm_cfg.get('model'):
|
||||
return jsonify({'summary': None, 'error': 'LLM not configured'}), 400
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return jsonify({'summary': None, 'error': 'Not enough history'}), 400
|
||||
|
||||
best_from = watch.get_from_version_based_on_last_viewed
|
||||
from_version = request.args.get('from_version', best_from if best_from else dates[-2])
|
||||
to_version = request.args.get('to_version', dates[-1])
|
||||
all_changes = request.args.get('all_changes', '0') == '1'
|
||||
ignore_whitespace = request.args.get('ignore_whitespace', '0') == '1'
|
||||
show_removed = request.args.get('removed', '1') == '1'
|
||||
show_added = request.args.get('added', '1') == '1'
|
||||
|
||||
def _prep(text):
|
||||
"""Optionally normalise whitespace on each line before diffing."""
|
||||
if not ignore_whitespace:
|
||||
return text.splitlines()
|
||||
return [' '.join(line.split()) for line in text.splitlines()]
|
||||
|
||||
def _make_unified_diff(a_text, b_text):
|
||||
lines = list(difflib.unified_diff(_prep(a_text), _prep(b_text), lineterm='', n=3))
|
||||
return '\n'.join(lines[2:]) if len(lines) > 2 else '\n'.join(lines)
|
||||
|
||||
def _apply_filters(diff_text):
|
||||
"""Strip +/- lines the user has hidden in the UI so the LLM matches what they see."""
|
||||
if show_removed and show_added:
|
||||
return diff_text
|
||||
out = []
|
||||
for line in diff_text.splitlines():
|
||||
if line.startswith('-') and not show_removed:
|
||||
continue
|
||||
if line.startswith('+') and not show_added:
|
||||
continue
|
||||
out.append(line)
|
||||
return '\n'.join(out)
|
||||
|
||||
try:
|
||||
from_text = watch.get_history_snapshot(timestamp=from_version)
|
||||
to_text = watch.get_history_snapshot(timestamp=to_version)
|
||||
except Exception as e:
|
||||
return jsonify({'summary': None, 'error': f'Could not read snapshots: {e}'}), 500
|
||||
|
||||
if all_changes:
|
||||
# Build sequential diffs for every intermediate snapshot between from and to
|
||||
# so the LLM sees the full timeline of changes, not just start→end
|
||||
sorted_dates = sorted(dates)
|
||||
try:
|
||||
start_idx = sorted_dates.index(from_version)
|
||||
end_idx = sorted_dates.index(to_version)
|
||||
except ValueError:
|
||||
start_idx, end_idx = 0, len(sorted_dates) - 1
|
||||
|
||||
steps = sorted_dates[start_idx:end_idx + 1]
|
||||
segments = []
|
||||
for i in range(len(steps) - 1):
|
||||
a_ts, b_ts = steps[i], steps[i + 1]
|
||||
try:
|
||||
a_text = watch.get_history_snapshot(timestamp=a_ts) or ''
|
||||
b_text = watch.get_history_snapshot(timestamp=b_ts) or ''
|
||||
except Exception:
|
||||
continue
|
||||
seg = _apply_filters(_make_unified_diff(a_text, b_text))
|
||||
if seg.strip():
|
||||
segments.append(f'=== {a_ts} → {b_ts} ===\n{seg}')
|
||||
|
||||
diff_text = '\n\n'.join(segments) if segments else ''
|
||||
else:
|
||||
diff_text = _apply_filters(_make_unified_diff(from_text, to_text))
|
||||
|
||||
if not diff_text.strip():
|
||||
return jsonify({'summary': None, 'error': 'No differences found'})
|
||||
|
||||
from changedetectionio.llm.evaluator import (
|
||||
summarise_change, get_effective_summary_prompt,
|
||||
is_global_token_budget_exceeded, get_global_token_budget_month,
|
||||
LLMInputTooLargeError,
|
||||
)
|
||||
|
||||
effective_prompt = get_effective_summary_prompt(watch, datastore)
|
||||
from changedetectionio.llm.prompt_builder import build_change_summary_system_prompt
|
||||
# Diff-pref flags + system prompt are part of the cache key so prompt changes bust the cache
|
||||
_max_summary_tokens = datastore.data['settings']['application'].get('llm_max_summary_tokens', 3000)
|
||||
cache_prompt = (
|
||||
effective_prompt
|
||||
+ f'\x00prefs:all={int(all_changes)},ws={int(ignore_whitespace)}'
|
||||
f',rm={int(show_removed)},add={int(show_added)}'
|
||||
+ f'\x00sys:{build_change_summary_system_prompt()}'
|
||||
+ f'\x00max_tokens:{_max_summary_tokens}'
|
||||
)
|
||||
|
||||
# Check cache — keyed by version pair + prompt hash (invalidates if prompt changes)
|
||||
cached = watch.get_llm_diff_summary(from_version, to_version, prompt=cache_prompt)
|
||||
if cached:
|
||||
return jsonify({'summary': cached, 'error': None, 'cached': True})
|
||||
|
||||
# Check global monthly token budget before making an LLM call
|
||||
if is_global_token_budget_exceeded(datastore):
|
||||
budget = get_global_token_budget_month(datastore)
|
||||
llm_cfg = datastore.data.get('settings', {}).get('application', {}).get('llm', {})
|
||||
used = llm_cfg.get('tokens_this_month', 0)
|
||||
return jsonify({
|
||||
'summary': None,
|
||||
'error': gettext(
|
||||
'Monthly AI token budget of %(budget)s tokens reached (%(used)s used). Resets next month.',
|
||||
budget=f'{budget:,}',
|
||||
used=f'{used:,}',
|
||||
),
|
||||
'budget_exceeded': True,
|
||||
}), 429
|
||||
|
||||
try:
|
||||
summary = summarise_change(watch, datastore, diff=diff_text, current_snapshot=to_text)
|
||||
except LLMInputTooLargeError as e:
|
||||
return jsonify({'summary': None, 'error': str(e)}), 400
|
||||
except Exception as e:
|
||||
logger.error(f"LLM summary generation failed for {uuid}: {e}")
|
||||
return jsonify({'summary': None, 'error': _clean_litellm_error(e)}), 500
|
||||
|
||||
if not summary:
|
||||
return jsonify({'summary': None, 'error': 'LLM returned empty summary'})
|
||||
|
||||
try:
|
||||
watch.save_llm_diff_summary(summary, from_version, to_version, prompt=cache_prompt)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not cache llm summary for {uuid}: {e}")
|
||||
|
||||
return jsonify({'summary': summary, 'error': None, 'cached': False})
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_GET(uuid):
|
||||
"""
|
||||
@@ -372,7 +182,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['POST'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_POST(uuid):
|
||||
"""
|
||||
@@ -428,48 +238,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/download-patch", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def download_patch(uuid):
|
||||
"""
|
||||
Generate and return a unified diff patch file between two snapshots.
|
||||
Query params: from_version, to_version (timestamp strings from watch history).
|
||||
Returns the patch as a downloadable .patch file — the same content fed to the LLM.
|
||||
"""
|
||||
import difflib
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
return make_response('Watch not found', 404)
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return make_response('Not enough history', 400)
|
||||
|
||||
from_version = request.args.get('from_version', dates[-2])
|
||||
to_version = request.args.get('to_version', dates[-1])
|
||||
|
||||
try:
|
||||
from_text = watch.get_history_snapshot(timestamp=from_version)
|
||||
to_text = watch.get_history_snapshot(timestamp=to_version)
|
||||
except Exception as e:
|
||||
return make_response(f'Could not read snapshots: {e}', 500)
|
||||
|
||||
diff_lines = list(difflib.unified_diff(
|
||||
from_text.splitlines(keepends=True),
|
||||
to_text.splitlines(keepends=True),
|
||||
fromfile=f'snapshot-{from_version}',
|
||||
tofile=f'snapshot-{to_version}',
|
||||
lineterm='',
|
||||
))
|
||||
patch_text = ''.join(diff_lines) if diff_lines else '(no differences)\n'
|
||||
|
||||
response = make_response(patch_text)
|
||||
response.headers['Content-Type'] = 'text/plain; charset=utf-8'
|
||||
return response
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
|
||||
@@ -10,39 +10,17 @@ from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
|
||||
def _resolve_llm_group_overrides(watch, datastore) -> dict:
|
||||
"""
|
||||
For each LLM field (llm_intent, llm_change_summary): if the watch has no own
|
||||
value but a linked tag does, return {'value': ..., 'group_name': ...} so the
|
||||
edit template can render the textarea as readonly with a group-sourced placeholder.
|
||||
Returns None for each field when the watch has its own value (editable).
|
||||
"""
|
||||
result = {'llm_intent': None, 'llm_change_summary': None}
|
||||
for field in ('llm_intent', 'llm_change_summary'):
|
||||
if (watch.get(field) or '').strip():
|
||||
continue # watch has its own value — editable, no group override
|
||||
for tag_uuid in watch.get('tags', []):
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
||||
if tag and (tag.get(field) or '').strip():
|
||||
result[field] = {
|
||||
'value': tag.get(field).strip(),
|
||||
'group_name': tag.get('title', 'tag'),
|
||||
}
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def _watch_has_tag_options_set(watch):
|
||||
"""This should be fixed better so that Tag is some proper Model, a tag is just a Watch also"""
|
||||
for tag_uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
||||
return True
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||
@@ -139,32 +117,19 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if processor_config:
|
||||
from wtforms.fields.form import FormField
|
||||
# Populate processor-config-* fields from JSON
|
||||
for config_key, config_value in processor_config.items():
|
||||
if not isinstance(config_value, dict):
|
||||
continue
|
||||
# Try exact API-named field first (e.g., processor_config_restock_diff)
|
||||
target_field = getattr(form, f'processor_config_{config_key}', None)
|
||||
# Fallback: find any FormField sub-form whose fields cover config_value keys
|
||||
if target_field is None:
|
||||
for form_field in form:
|
||||
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
|
||||
target_field = form_field
|
||||
break
|
||||
if target_field is not None:
|
||||
for sub_key, sub_value in config_value.items():
|
||||
sub_field = target_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
|
||||
field_name = f'processor_config_{config_key}'
|
||||
if hasattr(form, field_name):
|
||||
getattr(form, field_name).data = config_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load processor config: {e}")
|
||||
|
||||
for p in datastore.extra_browsers:
|
||||
form.fetch_backend.choices.append(p)
|
||||
|
||||
form.fetch_backend.choices.append(("system", gettext('System settings default')))
|
||||
form.fetch_backend.choices.append(("system", 'System settings default'))
|
||||
|
||||
# form.browser_steps[0] can be assumed that we 'goto url' first
|
||||
|
||||
@@ -172,7 +137,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||
del form.proxy
|
||||
else:
|
||||
form.proxy.choices = [('', gettext('Default'))]
|
||||
form.proxy.choices = [('', 'Default')]
|
||||
for p in datastore.proxy_list:
|
||||
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||
|
||||
@@ -323,7 +288,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'extra_classes': ' '.join(c),
|
||||
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||
'extra_processor_config': form.extra_tab_content(),
|
||||
'extra_title': f" - {gettext('Edit')} - {watch.label}",
|
||||
'extra_title': f" - Edit - {watch.label}",
|
||||
'form': form,
|
||||
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||
@@ -342,15 +307,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
'capabilities': capabilities,
|
||||
'auto_applied_tags': {
|
||||
tag_uuid: tag
|
||||
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
|
||||
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
|
||||
},
|
||||
# LLM intent context
|
||||
'llm_configured': bool(_get_llm_config(datastore)),
|
||||
'llm_group_overrides': _resolve_llm_group_overrides(watch, datastore),
|
||||
'capabilities': capabilities
|
||||
}
|
||||
|
||||
included_content = None
|
||||
@@ -370,7 +327,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
return output
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-html", methods=['GET'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_latest_html(uuid):
|
||||
from io import BytesIO
|
||||
@@ -397,7 +354,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Return a 500 error
|
||||
abort(500)
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-data-package", methods=['GET'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/get-data-package", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_data_package(uuid):
|
||||
"""Download all data for a single watch as a zip file"""
|
||||
@@ -448,7 +405,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
mimetype='application/zip')
|
||||
|
||||
# Ajax callback
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def watch_get_preview_rendered(uuid):
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
|
||||
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
@@ -60,8 +59,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
@@ -71,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||
|
||||
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
@@ -107,7 +108,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
current_diff_url=watch['url'],
|
||||
current_version=timestamp,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - {gettext('Diff')} - {watch.label} @ {timestamp}",
|
||||
extra_title=f" - Diff - {watch.label} @ {timestamp}",
|
||||
highlight_ignored_line_numbers=ignored_line_numbers,
|
||||
highlight_triggered_line_numbers=triggered_line_numbers,
|
||||
highlight_blocked_line_numbers=blocked_line_numbers,
|
||||
@@ -124,7 +125,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return output
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
|
||||
@@ -25,8 +25,7 @@
|
||||
<fieldset class="diff-fieldset">
|
||||
{% if versions|length >= 1 %}
|
||||
<span style="white-space: nowrap;">
|
||||
{# TRANSLATORS: 'From' labels the older snapshot version selector on the diff page #}
|
||||
<label id="change-from" for="diff-from-version" class="from-to-label">{{ pgettext('diff version', 'From') }}</label>
|
||||
<label id="change-from" for="diff-from-version" class="from-to-label">{{ _('From') }}</label>
|
||||
<select id="diff-from-version" name="from_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
|
||||
@@ -36,8 +35,7 @@
|
||||
</select>
|
||||
</span>
|
||||
<span style="white-space: nowrap;">
|
||||
{# TRANSLATORS: 'To' labels the newer snapshot version selector on the diff page #}
|
||||
<label id="change-to" for="diff-to-version" class="from-to-label">{{ pgettext('diff version', 'To') }}</label>
|
||||
<label id="change-to" for="diff-to-version" class="from-to-label">{{ _('To') }}</label>
|
||||
<select id="diff-to-version" name="to_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
|
||||
@@ -78,12 +76,6 @@
|
||||
<label for="replaced" class="pure-checkbox" id="label-diff-replaced">
|
||||
<input type="checkbox" id="replaced" name="replaced" {% if diff_prefs.replaced %}checked=""{% endif %}> {{ _('Replaced') }}</label>
|
||||
</span>
|
||||
{%- if llm_configured -%}
|
||||
<span>
|
||||
<label for="llm_all_changes" class="pure-checkbox" id="label-diff-llm-all-changes" title="{{ _('Include all intermediate snapshots between the selected versions in the AI summary') }}">
|
||||
<input type="checkbox" id="llm_all_changes" name="llm_all_changes" {% if diff_prefs.llm_all_changes %}checked=""{% endif %}> ✨ {{ _('AI: every change between versions') }}</label>
|
||||
</span>
|
||||
{%- endif -%}
|
||||
</fieldset>
|
||||
{%- if versions|length >= 2 -%}
|
||||
<div id="keyboard-nav">
|
||||
@@ -132,22 +124,9 @@
|
||||
</div>
|
||||
{%- endif -%}
|
||||
{%- if password_enabled_and_share_is_off -%}
|
||||
<div class="tip">{{ _('Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings.')|safe }}
|
||||
<div class="tip">{{ _('Pro-tip: You can enable') }} <strong>{{ _('"share access when password is enabled"') }}</strong> {{ _('from settings.') }}
|
||||
</div>
|
||||
{%- endif -%}
|
||||
{%- if llm_configured -%}
|
||||
<div id="llm-diff-summary-area"{% if not llm_diff_summary %} data-pending="1"{% endif %}>
|
||||
<span class="llm-diff-summary-label">✨ {{ _('AI Change Summary') }}</span>
|
||||
{%- if llm_diff_summary -%}
|
||||
<p class="llm-diff-summary-text">{{ llm_diff_summary }}</p>
|
||||
{%- else -%}
|
||||
<p class="llm-diff-summary-text llm-diff-summary-loading">{{ _('Generating summary…') }}</p>
|
||||
{%- if llm_summary_prompt -%}
|
||||
<p class="llm-diff-summary-prompt"><span class="llm-diff-summary-prompt-text">{{ llm_summary_prompt }}</span></p>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
</div>
|
||||
{%- endif -%}
|
||||
<div id="text-diff-heading-area" style="user-select: none;">
|
||||
<div class="snapshot-age"><span>{{ from_version|format_timestamp_timeago }}</span>
|
||||
{%- if note -%}<span class="note"><strong>{{ note }}</strong></span>{%- endif -%}
|
||||
@@ -157,7 +136,6 @@
|
||||
<pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
|
||||
<div id="diff-visualiser-area-after" style="user-select: none;">
|
||||
<strong>{{ _('Tip:') }}</strong> {{ _('Highlight text to share or add to ignore lists.') }}
|
||||
— <a href="{{ url_for('ui.ui_diff.download_patch', uuid=uuid, from_version=from_version, to_version=to_version) }}" target="_blank" rel="noopener" style="font-size:0.85em;">{{ _('Download difference patch') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -184,58 +162,5 @@
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
||||
|
||||
{% if llm_configured %}
|
||||
<script>
|
||||
$(function () {
|
||||
var $area = $('#llm-diff-summary-area');
|
||||
if (!$area.length || !$area.data('pending')) return;
|
||||
|
||||
var fromVersion = $('#diff-from-version').val();
|
||||
var toVersion = $('#diff-to-version').val();
|
||||
var summaryUrl = "{{ url_for('ui.ui_diff.diff_llm_summary', uuid=uuid) }}";
|
||||
|
||||
function showLlmError(msg) {
|
||||
$area.find('.llm-diff-summary-text')
|
||||
.removeClass('llm-diff-summary-loading')
|
||||
.addClass('llm-error')
|
||||
.text(msg);
|
||||
$area.removeAttr('data-pending');
|
||||
}
|
||||
|
||||
var llmAllChanges = $('#llm_all_changes').is(':checked') ? 1 : 0;
|
||||
var ignoreWhitespace = $('#ignoreWhitespace').is(':checked') ? 1 : 0;
|
||||
var showRemoved = $('#removed').is(':checked') ? 1 : 0;
|
||||
var showAdded = $('#added').is(':checked') ? 1 : 0;
|
||||
$.getJSON(summaryUrl, {
|
||||
from_version: fromVersion,
|
||||
to_version: toVersion,
|
||||
all_changes: llmAllChanges,
|
||||
ignore_whitespace: ignoreWhitespace,
|
||||
removed: showRemoved,
|
||||
added: showAdded,
|
||||
})
|
||||
.done(function (data) {
|
||||
if (data.summary) {
|
||||
$area.find('.llm-diff-summary-text')
|
||||
.removeClass('llm-diff-summary-loading')
|
||||
.text(data.summary);
|
||||
$area.removeAttr('data-pending');
|
||||
} else if (data.error) {
|
||||
showLlmError(data.error);
|
||||
} else {
|
||||
$area.remove();
|
||||
}
|
||||
})
|
||||
.fail(function (xhr) {
|
||||
var resp = xhr.responseJSON;
|
||||
if (resp && resp.error) {
|
||||
showLlmError(resp.error);
|
||||
} else {
|
||||
showLlmError('AI summary request failed (HTTP ' + xhr.status + ').');
|
||||
}
|
||||
});
|
||||
});
|
||||
</script>
|
||||
{% endif %}
|
||||
|
||||
{% endblock %}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
@@ -57,7 +57,6 @@
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#ai-llm">{{ _('AI / LLM') }}</a></li>
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
@@ -82,14 +81,6 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
|
||||
{% if auto_applied_tags %}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Also automatically applied by URL pattern:') }}
|
||||
{% for tag_uuid, tag in auto_applied_tags.items() %}
|
||||
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
|
||||
{% endfor %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
@@ -142,8 +133,8 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>{{ _('Use the <strong>Basic</strong> method (default) where your watched sites don\'t need Javascript to render.')|safe }}</p>
|
||||
<p>{{ _('The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.')|safe }}</p>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched site doesn\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.') }} </p>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
</span>
|
||||
</div>
|
||||
@@ -164,7 +155,7 @@
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<br>
|
||||
{{ _('This will wait <i>n</i> seconds before extracting the text.')|safe }}
|
||||
{{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
|
||||
{% if using_global_webdriver_wait %}
|
||||
<br><strong>{{ _('Using the current global default settings') }}</strong>
|
||||
{% endif %}
|
||||
@@ -297,7 +288,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{% if has_default_notification_urls %}
|
||||
<div class="inline-warning">
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
|
||||
{{ _('There are <a href="%(url)s">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.', url=url_for('settings.settings_page') ~ '#notifications')|safe }}
|
||||
{{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
|
||||
@@ -321,11 +312,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="ai-llm">
|
||||
{% include "edit/include_llm_intent.html" %}
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
@@ -351,7 +338,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ render_checkbox_field(form.filter_text_added) }}
|
||||
{{ render_checkbox_field(form.filter_text_replaced) }}
|
||||
{{ render_checkbox_field(form.filter_text_removed) }}
|
||||
<span class="pure-form-message-inline">{{ _('Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.')|safe }}</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('Note: Depending on the length and similarity of the text on each line, the algorithm may consider an') }} <strong>{{ _('addition') }}</strong> {{ _('instead of') }} <strong>{{ _('replacement') }}</strong> {{ _('for example.') }}</span><br>
|
||||
<span class="pure-form-message-inline"> {{ _('So it\'s always better to select') }} <strong>{{ _('Added') }}</strong>+<strong>{{ _('Replaced') }}</strong> {{ _('when you\'re interested in new content.') }}</span><br>
|
||||
<span class="pure-form-message-inline"> {{ _('When content is merely moved in a list, it will also trigger an') }} <strong>{{ _('addition') }}</strong>, {{ _('consider enabling') }} <code><strong>{{ _('Only trigger when unique lines appear') }}</strong></code></span>
|
||||
</fieldset>
|
||||
@@ -365,7 +352,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.sort_text_alphabetically) }}
|
||||
<span class="pure-form-message-inline">{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.')|safe }}</span>
|
||||
<span class="pure-form-message-inline">{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with') }} <i>{{ _('check unique lines') }}</i> {{ _('below.') }}</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.trim_text_whitespace) }}
|
||||
@@ -379,20 +366,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
|
||||
</script>
|
||||
<br>
|
||||
{% if llm_configured %}
|
||||
<div id="llm-preview-result" style="display:none; margin-bottom: 0.8em; padding: 0.8em 1.1em; border-radius: 4px; border-left: 4px solid #ccc; font-size: 0.9em;">
|
||||
<div style="font-size:0.75em; text-transform:uppercase; letter-spacing:0.06em; opacity:0.55; margin-bottom:0.35em;">{{ _('AI Intent preview') }}</div>
|
||||
<span class="llm-preview-verdict" style="font-weight: bold;"></span>
|
||||
<div class="llm-preview-answer" style="margin-top: 0.5em; white-space: pre-wrap; line-height: 1.5; font-style: italic;"></div>
|
||||
</div>
|
||||
<style>
|
||||
#llm-preview-result { transition: border-color 0.2s, background 0.2s; }
|
||||
#llm-preview-result[data-found="1"] { border-color: #2ecc71; background: rgba(46,204,113,0.07); }
|
||||
#llm-preview-result[data-found="1"] .llm-preview-verdict { color: #27ae60; }
|
||||
#llm-preview-result[data-found="0"] { border-color: #aaa; background: rgba(0,0,0,0.03); }
|
||||
#llm-preview-result[data-found="0"] .llm-preview-verdict { color: #888; }
|
||||
</style>
|
||||
{% endif %}
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
@@ -502,16 +476,6 @@ Math: {{ 1 + 1 }}") }}
|
||||
<td>{{ _('Server type reply') }}</td>
|
||||
<td>{{ watch.get('remote_server_reply') }}</td>
|
||||
</tr>
|
||||
{% if settings_application.get('llm', {}).get('model') %}
|
||||
<tr>
|
||||
<td>{{ _('AI tokens (last check)') }}</td>
|
||||
<td>{{ "{:,}".format(watch.get('llm_last_tokens_used') or 0) }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{{ _('AI tokens (total)') }}</td>
|
||||
<td>{{ "{:,}".format(watch.get('llm_tokens_used_cumulative') or 0) }}</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
@@ -28,7 +28,6 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
|
||||
@@ -26,11 +26,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
from changedetectionio import processors
|
||||
processor = request.form.get('processor', processors.get_default_processor())
|
||||
llm_intent = request.form.get('llm_intent', '').strip()
|
||||
extras = {'paused': add_paused, 'processor': processor}
|
||||
if llm_intent:
|
||||
extras['llm_intent'] = llm_intent
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras=extras)
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
|
||||
@@ -81,12 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
proxy_list = datastore.proxy_list
|
||||
|
||||
from changedetectionio.llm.evaluator import get_llm_config as _get_llm_config
|
||||
from changedetectionio.llm.ui_strings import LLM_INTENT_WATCH_PLACEHOLDER
|
||||
llm_configured = bool(_get_llm_config(datastore))
|
||||
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
@@ -94,12 +88,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
datastore=datastore,
|
||||
errored_count=errored_count,
|
||||
extra_classes=' '.join(filter(None, ['has-queue' if not update_q.empty() else '', 'llm-configured' if llm_configured else ''])),
|
||||
extra_classes='has-queue' if not update_q.empty() else '',
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
wcag_text_color=processors.wcag_text_color,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=proxy_list,
|
||||
has_proxies=datastore.proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -114,21 +107,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=sorted_tags,
|
||||
unread_changes_count=datastore.unread_changes_count,
|
||||
watches=sorted_watches,
|
||||
llm_configured=llm_configured,
|
||||
llm_intent_watch_placeholder=LLM_INTENT_WATCH_PLACEHOLDER,
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
# Return freed template-building memory to the OS immediately.
|
||||
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.get('share-link'):
|
||||
del (session['share-link'])
|
||||
|
||||
|
||||
@@ -71,13 +71,6 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{%- for uuid, tag in tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- if tag.get('tag_colour') -%}
|
||||
.button-tag.tag-{{ class_name }},
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ tag.tag_colour }};
|
||||
color: {{ wcag_text_color(tag.tag_colour) }};
|
||||
}
|
||||
{%- else -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.button-tag.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
@@ -99,7 +92,6 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
<div class="box" id="form-quick-watch-add">
|
||||
@@ -113,16 +105,6 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{{ render_nolabel_field(form.watch_submit_button, title=_("Watch this URL!") ) }}
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title=_("Edit first then Watch") ) }}
|
||||
</div>
|
||||
{% if llm_configured %}
|
||||
<div id="quick-watch-llm-intent" style="display:none; margin-top: 0.5em;">
|
||||
<textarea name="llm_intent"
|
||||
id="quick_watch_llm_intent"
|
||||
rows="2"
|
||||
class="pure-input-1"
|
||||
placeholder="{{ _('AI — Notify when…') }} {{ llm_intent_watch_placeholder }}"
|
||||
></textarea>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div id="watch-group-tag">
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder=_("Watch group / tag"), class="transparent-field") }}
|
||||
</div>
|
||||
@@ -136,14 +118,6 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</span>
|
||||
</form>
|
||||
</div>
|
||||
{% if llm_configured %}
|
||||
<script>
|
||||
window.watchOverviewI18n = {
|
||||
generatingSummary: {{ _('Generating summary…')|tojson }},
|
||||
gotoHistory: {{ _('Goto full history')|tojson }}
|
||||
};
|
||||
</script>
|
||||
{% endif %}
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
@@ -239,13 +213,12 @@ window.watchOverviewI18n = {
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if error_texts|length > 2 else '',
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
@@ -263,10 +236,10 @@ window.watchOverviewI18n = {
|
||||
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
|
||||
<td class="inline watch-controls">
|
||||
<div>
|
||||
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="{{ _('Pause checks') }}" title="{{ _('Pause checks') }}" class="icon icon-pause" ></a>
|
||||
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="{{ _('UnPause checks') }}" title="{{ _('UnPause checks') }}" class="icon icon-unpause" ></a>
|
||||
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('Mute notification') }}" title="{{ _('Mute notification') }}" class="icon icon-mute" ></a>
|
||||
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('UnMute notification') }}" title="{{ _('UnMute notification') }}" class="icon icon-mute" ></a>
|
||||
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
||||
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
|
||||
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
|
||||
</div>
|
||||
</td>
|
||||
|
||||
@@ -298,7 +271,7 @@ window.watchOverviewI18n = {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
@@ -310,7 +283,7 @@ window.watchOverviewI18n = {
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="{{ _('Create a link to share watch config with others') }}" ></a>
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
|
||||
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
|
||||
{{ effective_fetcher|fetcher_status_icons }}
|
||||
@@ -332,20 +305,12 @@ window.watchOverviewI18n = {
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||
{%- set restock = watch['restock'] -%}
|
||||
{%- set price = restock.get('price') -%}
|
||||
{%- set cur = restock.get('currency','') -%}
|
||||
|
||||
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||
{{ price }} {{ cur }}<!-- as string -->
|
||||
{%- if watch['restock']['price'] is number -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
|
||||
{%- endif -%}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
{%- endif -%}
|
||||
@@ -353,13 +318,13 @@ window.watchOverviewI18n = {
|
||||
</td>
|
||||
{%- endif -%}
|
||||
{#last_checked becomes fetch-start-time#}
|
||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" data-label="{{ _('Last Checked') }}">
|
||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
|
||||
<div class="spinner-wrapper" style="display:none;" >
|
||||
<span class="spinner"></span><span class="status-text"> {{ _('Checking now') }}</span>
|
||||
</div>
|
||||
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
|
||||
</td>
|
||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}" data-label="{{ _('Last Changed') }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
|
||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
{%- else -%}
|
||||
{{ _('Not yet') }}
|
||||
@@ -371,7 +336,7 @@ window.watchOverviewI18n = {
|
||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">{{ _('Queued') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">{{ _('Recheck') }}</a>
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link ai-history-btn" style="display: none;" data-uuid="{{ watch.uuid }}" data-summary-url="{{ url_for('ui.ui_diff.diff_llm_summary', uuid=watch.uuid) }}"><span class="btn-label-history">{{ _('History') }}</span><span class="btn-label-summary">✨ {{ _('Summary') }}</span></a>
|
||||
<a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">{{ _('History') }}</a>
|
||||
<a href="{{ url_for('ui.ui_preview.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">{{ _('Preview') }}</a>
|
||||
</div>
|
||||
</td>
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from json_logic.builtins import BUILTINS
|
||||
from flask_babel import lazy_gettext as _l
|
||||
|
||||
from .exceptions import EmptyConditionRuleRowNotUsable
|
||||
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
|
||||
@@ -7,19 +6,19 @@ from . import default_plugin
|
||||
from loguru import logger
|
||||
# List of all supported JSON Logic operators
|
||||
operator_choices = [
|
||||
(None, _l("Choose one - Operator")),
|
||||
(">", _l("Greater Than")),
|
||||
("<", _l("Less Than")),
|
||||
(">=", _l("Greater Than or Equal To")),
|
||||
("<=", _l("Less Than or Equal To")),
|
||||
("==", _l("Equals")),
|
||||
("!=", _l("Not Equals")),
|
||||
("in", _l("Contains")),
|
||||
(None, "Choose one - Operator"),
|
||||
(">", "Greater Than"),
|
||||
("<", "Less Than"),
|
||||
(">=", "Greater Than or Equal To"),
|
||||
("<=", "Less Than or Equal To"),
|
||||
("==", "Equals"),
|
||||
("!=", "Not Equals"),
|
||||
("in", "Contains"),
|
||||
]
|
||||
|
||||
# Fields available in the rules
|
||||
field_choices = [
|
||||
(None, _l("Choose one - Field")),
|
||||
(None, "Choose one - Field"),
|
||||
]
|
||||
|
||||
# The data we will feed the JSON Rules to see if it passes the test/conditions or not
|
||||
|
||||
@@ -3,7 +3,6 @@ import re
|
||||
import pluggy
|
||||
from price_parser import Price
|
||||
from loguru import logger
|
||||
from flask_babel import lazy_gettext as _l
|
||||
|
||||
hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
||||
|
||||
@@ -48,22 +47,22 @@ def register_operators():
|
||||
@hookimpl
|
||||
def register_operator_choices():
|
||||
return [
|
||||
("!in", _l("Does NOT Contain")),
|
||||
("starts_with", _l("Text Starts With")),
|
||||
("ends_with", _l("Text Ends With")),
|
||||
("length_min", _l("Length minimum")),
|
||||
("length_max", _l("Length maximum")),
|
||||
("contains_regex", _l("Text Matches Regex")),
|
||||
("!contains_regex", _l("Text Does NOT Match Regex")),
|
||||
("!in", "Does NOT Contain"),
|
||||
("starts_with", "Text Starts With"),
|
||||
("ends_with", "Text Ends With"),
|
||||
("length_min", "Length minimum"),
|
||||
("length_max", "Length maximum"),
|
||||
("contains_regex", "Text Matches Regex"),
|
||||
("!contains_regex", "Text Does NOT Match Regex"),
|
||||
]
|
||||
|
||||
@hookimpl
|
||||
def register_field_choices():
|
||||
return [
|
||||
("extracted_number", _l("Extracted number after 'Filters & Triggers'")),
|
||||
("extracted_number", "Extracted number after 'Filters & Triggers'"),
|
||||
# ("meta_description", "Meta Description"),
|
||||
# ("meta_keywords", "Meta Keywords"),
|
||||
("page_filtered_text", _l("Page text after 'Filters & Triggers'")),
|
||||
("page_filtered_text", "Page text after 'Filters & Triggers'"),
|
||||
#("page_title", "Page <title>"), # actual page title <title>
|
||||
]
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# Condition Rule Form (for each rule row)
|
||||
from wtforms import Form, SelectField, StringField, validators
|
||||
from wtforms import validators
|
||||
from flask_babel import lazy_gettext as _l
|
||||
|
||||
class ConditionFormRow(Form):
|
||||
|
||||
@@ -9,18 +8,18 @@ class ConditionFormRow(Form):
|
||||
from changedetectionio.conditions import plugin_manager
|
||||
from changedetectionio.conditions import operator_choices, field_choices
|
||||
field = SelectField(
|
||||
_l("Field"),
|
||||
"Field",
|
||||
choices=field_choices,
|
||||
validators=[validators.Optional()]
|
||||
)
|
||||
|
||||
operator = SelectField(
|
||||
_l("Operator"),
|
||||
"Operator",
|
||||
choices=operator_choices,
|
||||
validators=[validators.Optional()]
|
||||
)
|
||||
|
||||
value = StringField(_l("Value"), validators=[validators.Optional()], render_kw={"placeholder": _l("A value")})
|
||||
value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"})
|
||||
|
||||
def validate(self, extra_validators=None):
|
||||
# First, run the default validators
|
||||
@@ -31,15 +30,15 @@ class ConditionFormRow(Form):
|
||||
# If any of the operator/field/value is set, then they must be all set
|
||||
if any(value not in ("", False, "None", None) for value in [self.operator.data, self.field.data, self.value.data]):
|
||||
if not self.operator.data or self.operator.data == 'None':
|
||||
self.operator.errors.append(_l("Operator is required."))
|
||||
self.operator.errors.append("Operator is required.")
|
||||
return False
|
||||
|
||||
if not self.field.data or self.field.data == 'None':
|
||||
self.field.errors.append(_l("Field is required."))
|
||||
self.field.errors.append("Field is required.")
|
||||
return False
|
||||
|
||||
if not self.value.data:
|
||||
self.value.errors.append(_l("Value is required."))
|
||||
self.value.errors.append("Value is required.")
|
||||
return False
|
||||
|
||||
return True # Only return True if all conditions pass
|
||||
@@ -4,7 +4,6 @@ Provides metrics for measuring text similarity between snapshots.
|
||||
"""
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
from flask_babel import gettext as _, lazy_gettext as _l
|
||||
|
||||
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
|
||||
|
||||
@@ -54,8 +53,8 @@ def register_operator_choices():
|
||||
@conditions_hookimpl
|
||||
def register_field_choices():
|
||||
return [
|
||||
("levenshtein_ratio", _l("Levenshtein - Text similarity ratio")),
|
||||
("levenshtein_distance", _l("Levenshtein - Text change distance")),
|
||||
("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
|
||||
("levenshtein_distance", "Levenshtein - Text change distance"),
|
||||
]
|
||||
|
||||
@conditions_hookimpl
|
||||
@@ -78,7 +77,7 @@ def ui_edit_stats_extras(watch):
|
||||
"""Add Levenshtein stats to the UI using the global plugin system"""
|
||||
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
|
||||
if len(watch.history.keys()) < 2:
|
||||
return f"<p>{_('Not enough history to calculate Levenshtein metrics')}</p>"
|
||||
return "<p>Not enough history to calculate Levenshtein metrics</p>"
|
||||
|
||||
|
||||
# Protection against the algorithm getting stuck on huge documents
|
||||
@@ -88,37 +87,37 @@ def ui_edit_stats_extras(watch):
|
||||
for idx in (-1, -2)
|
||||
if len(k) >= abs(idx)
|
||||
):
|
||||
return f"<p>{_('Snapshot too large for edit statistics, skipping.')}</p>"
|
||||
return "<p>Snapshot too large for edit statistics, skipping.</p>"
|
||||
|
||||
try:
|
||||
lev_data = levenshtein_ratio_recent_history(watch)
|
||||
if not lev_data or not isinstance(lev_data, dict):
|
||||
return f"<p>{_('Unable to calculate Levenshtein metrics')}</p>"
|
||||
|
||||
return "<p>Unable to calculate Levenshtein metrics</p>"
|
||||
|
||||
html = f"""
|
||||
<div class="levenshtein-stats">
|
||||
<h4>{_('Levenshtein Text Similarity Details')}</h4>
|
||||
<h4>Levenshtein Text Similarity Details</h4>
|
||||
<table class="pure-table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>{_('Raw distance (edits needed)')}</td>
|
||||
<td>Raw distance (edits needed)</td>
|
||||
<td>{lev_data['distance']}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{_('Similarity ratio')}</td>
|
||||
<td>Similarity ratio</td>
|
||||
<td>{lev_data['ratio']:.4f}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{_('Percent similar')}</td>
|
||||
<td>Percent similar</td>
|
||||
<td>{lev_data['percent_similar']}%</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p style="font-size: 80%;">{_('Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.')}</p>
|
||||
<p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
|
||||
</div>
|
||||
"""
|
||||
return html
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
|
||||
return f"<p>{_('Error calculating Levenshtein metrics')}</p>"
|
||||
return "<p>Error calculating Levenshtein metrics</p>"
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ Provides word count metrics for snapshot content.
|
||||
"""
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
from flask_babel import gettext as _, lazy_gettext as _l
|
||||
|
||||
# Support both plugin systems
|
||||
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
||||
@@ -41,7 +40,7 @@ def register_operator_choices():
|
||||
def register_field_choices():
|
||||
# Add a field that will be available in conditions
|
||||
return [
|
||||
("word_count", _l("Word count of content")),
|
||||
("word_count", "Word count of content"),
|
||||
]
|
||||
|
||||
@conditions_hookimpl
|
||||
@@ -62,16 +61,16 @@ def _generate_stats_html(watch):
|
||||
|
||||
html = f"""
|
||||
<div class="word-count-stats">
|
||||
<h4>{_('Content Analysis')}</h4>
|
||||
<h4>Content Analysis</h4>
|
||||
<table class="pure-table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>{_('Word count (latest snapshot)')}</td>
|
||||
<td>Word count (latest snapshot)</td>
|
||||
<td>{word_count}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p style="font-size: 80%;">{_('Word count is a simple measure of content length, calculated by splitting text on whitespace.')}</p>
|
||||
<p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
|
||||
</div>
|
||||
"""
|
||||
return html
|
||||
|
||||
@@ -49,9 +49,6 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
|
||||
@@ -75,9 +75,6 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
@@ -89,8 +86,8 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
"""(y) => {
|
||||
const el = document.scrollingElement;
|
||||
if (el) el.scrollTop = y;
|
||||
document.documentElement.scrollTop = y;
|
||||
document.body.scrollTop = y;
|
||||
}""",
|
||||
y
|
||||
)
|
||||
@@ -308,8 +305,6 @@ class fetcher(Fetcher):
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
finally:
|
||||
self.browser = None
|
||||
raise
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
@@ -537,14 +532,6 @@ class fetcher(Fetcher):
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
finally:
|
||||
# Internal cleanup on any exception/timeout - call quit() immediately
|
||||
# This prevents connection leaks during exception bursts
|
||||
# Worker.py's quit() call becomes a redundant safety net (idempotent)
|
||||
try:
|
||||
await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from loguru import logger
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
@@ -9,12 +7,11 @@ import asyncio
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
|
||||
|
||||
# "html_requests" is listed as the default fetcher in store.py!
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = _l("Basic fast Plaintext/HTTP Client")
|
||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -82,48 +79,14 @@ class fetcher(Fetcher):
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
session.mount('file://', FileAdapter())
|
||||
|
||||
allow_iana_restricted = strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false'))
|
||||
|
||||
try:
|
||||
# Fresh DNS check at fetch time — catches DNS rebinding regardless of add-time cache.
|
||||
if not allow_iana_restricted:
|
||||
parsed_initial = urlparse(url)
|
||||
if parsed_initial.hostname and is_private_hostname(parsed_initial.hostname):
|
||||
raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow.")
|
||||
|
||||
r = session.request(method=request_method,
|
||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
|
||||
# Manually follow redirects so each hop's resolved IP can be validated,
|
||||
# preventing SSRF via an open redirect on a public host.
|
||||
current_url = url
|
||||
for _ in range(10):
|
||||
if not r.is_redirect:
|
||||
break
|
||||
location = r.headers.get('Location', '')
|
||||
redirect_url = urljoin(current_url, location)
|
||||
if not allow_iana_restricted:
|
||||
parsed_redirect = urlparse(redirect_url)
|
||||
if parsed_redirect.hostname and is_private_hostname(parsed_redirect.hostname):
|
||||
raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address.")
|
||||
current_url = redirect_url
|
||||
r = session.request('GET', redirect_url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
else:
|
||||
raise Exception("Too many redirects")
|
||||
|
||||
verify=False)
|
||||
except Exception as e:
|
||||
msg = str(e)
|
||||
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
||||
@@ -149,32 +112,10 @@ class fetcher(Fetcher):
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||
# (WHATWG encoding sniffing algorithm):
|
||||
# 1. BOM - highest confidence, check before anything else
|
||||
# 2. <meta charset> in first 2kb
|
||||
# 3. chardet statistical detection - last resort
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
boms = [
|
||||
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||
if bom_encoding:
|
||||
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||
r.encoding = bom_encoding
|
||||
else:
|
||||
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||
if meta_charset_match:
|
||||
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||
r.encoding = encoding
|
||||
else:
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
|
||||
@@ -38,39 +38,26 @@
|
||||
if (a.size !== b.size) {
|
||||
return b.size - a.size;
|
||||
}
|
||||
|
||||
|
||||
// Second priority: apple-touch-icon over regular icon
|
||||
const isAppleA = /apple-touch-icon/.test(a.rel);
|
||||
const isAppleB = /apple-touch-icon/.test(b.rel);
|
||||
if (isAppleA && !isAppleB) return -1;
|
||||
if (!isAppleA && isAppleB) return 1;
|
||||
|
||||
|
||||
// Third priority: icons with no size attribute (fallback icons) last
|
||||
const hasNoSizeA = !a.hasSizes;
|
||||
const hasNoSizeB = !b.hasSizes;
|
||||
if (hasNoSizeA && !hasNoSizeB) return 1;
|
||||
if (!hasNoSizeA && hasNoSizeB) return -1;
|
||||
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
const timeoutMs = 2000;
|
||||
// 1 MB — matches the server-side limit in bump_favicon()
|
||||
const MAX_BYTES = 1 * 1024 * 1024;
|
||||
|
||||
for (const icon of icons) {
|
||||
try {
|
||||
// Inline data URI — no network fetch needed, data is already here
|
||||
if (icon.href.startsWith('data:')) {
|
||||
const match = icon.href.match(/^data:([^;]+);base64,([A-Za-z0-9+/=]+)$/);
|
||||
if (!match) continue;
|
||||
const mime_type = match[1];
|
||||
const base64 = match[2];
|
||||
// Rough size check: base64 is ~4/3 the binary size
|
||||
if (base64.length * 0.75 > MAX_BYTES) continue;
|
||||
return { url: icon.href, mime_type, base64 };
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
@@ -87,15 +74,12 @@
|
||||
|
||||
const blob = await resp.blob();
|
||||
|
||||
if (blob.size > MAX_BYTES) continue;
|
||||
|
||||
// Convert blob to base64
|
||||
const reader = new FileReader();
|
||||
return await new Promise(resolve => {
|
||||
reader.onloadend = () => {
|
||||
resolve({
|
||||
url: icon.href,
|
||||
mime_type: blob.type,
|
||||
base64: reader.result.split(",")[1]
|
||||
});
|
||||
};
|
||||
@@ -114,3 +98,4 @@
|
||||
// Auto-execute and return result for page.evaluate()
|
||||
return await window.getFaviconAsBlob();
|
||||
})();
|
||||
|
||||
|
||||
@@ -56,10 +56,6 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
|
||||
if total_height > capture_height:
|
||||
stitched = stitched.crop((0, 0, max_width, capture_height))
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
@@ -104,17 +104,15 @@ class fetcher(Fetcher):
|
||||
|
||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
||||
from selenium.webdriver.remote.client_config import ClientConfig
|
||||
from urllib3.util import Timeout
|
||||
driver = None
|
||||
try:
|
||||
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
|
||||
client_config = ClientConfig(
|
||||
remote_server_addr=self.browser_connection_url,
|
||||
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
|
||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
||||
remote_connection = RemoteConnection(
|
||||
self.browser_connection_url,
|
||||
)
|
||||
remote_connection = RemoteConnection(client_config=client_config)
|
||||
remote_connection.set_timeout(30) # seconds
|
||||
|
||||
# Now create the driver with the RemoteConnection
|
||||
driver = RemoteWebDriver(
|
||||
command_executor=remote_connection,
|
||||
options=options
|
||||
|
||||
@@ -45,38 +45,8 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
# Regexes built from the constants above — no brittle hardcoded strings
|
||||
_EXTRACT_REMOVED_RE = re.compile(
|
||||
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
|
||||
)
|
||||
_EXTRACT_ADDED_RE = re.compile(
|
||||
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
|
||||
)
|
||||
|
||||
|
||||
def extract_changed_from(raw_diff: str) -> str:
|
||||
"""Extract only the removed/changed-from fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(next((g for g in m.groups() if g is not None), '') for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def extract_changed_to(raw_diff: str) -> str:
|
||||
"""Extract only the added/changed-into fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(next((g for g in m.groups() if g is not None), '') for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
"""
|
||||
Render word-level differences between two lines inline using diff-match-patch library.
|
||||
|
||||
@@ -163,20 +133,14 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if removed_tokens:
|
||||
removed_full = ''.join(removed_tokens).rstrip()
|
||||
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
else:
|
||||
result_parts.append(f'{removed_full}{trailing_removed}')
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
|
||||
if added_tokens:
|
||||
if result_parts: # Add newline between removed and added
|
||||
result_parts.append('\n')
|
||||
added_full = ''.join(added_tokens).rstrip()
|
||||
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
else:
|
||||
result_parts.append(f'{added_full}{trailing_added}')
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
else:
|
||||
@@ -186,27 +150,21 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if op == 0: # Equal
|
||||
result_parts.append(text)
|
||||
elif op == 1: # Insertion
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
result_parts.append(trailing)
|
||||
elif op == -1: # Deletion
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
result_parts.append(trailing)
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
|
||||
@@ -402,7 +360,7 @@ def customSequenceMatcher(
|
||||
|
||||
# Use inline word-level diff for single line replacements when word_diff is enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix)
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
|
||||
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
|
||||
if ignore_junk and not has_changes:
|
||||
# No real changes, skip this line
|
||||
@@ -457,8 +415,8 @@ def render_diff(
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
newest_lines = [line.rstrip() for line in (newest_version_file_contents or '').splitlines()]
|
||||
previous_lines = [line.rstrip() for line in (previous_version_file_contents or '').splitlines()]
|
||||
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
|
||||
now = time.time()
|
||||
logger.debug(
|
||||
f"diff options: "
|
||||
|
||||
@@ -4,7 +4,6 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -28,6 +27,7 @@ from flask import (
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
from flask_restful import abort, Api
|
||||
from flask_cors import CORS
|
||||
|
||||
@@ -40,7 +40,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
@@ -69,43 +69,19 @@ socketio_server = None
|
||||
|
||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
||||
CORS(app)
|
||||
from werkzeug.routing import BaseConverter, ValidationError
|
||||
from uuid import UUID
|
||||
|
||||
class StrictUUIDConverter(BaseConverter):
|
||||
# Special sentinel values allowed in addition to strict UUIDs
|
||||
_ALLOWED_SENTINELS = frozenset({'first'})
|
||||
|
||||
def to_python(self, value: str) -> str:
|
||||
if value in self._ALLOWED_SENTINELS:
|
||||
return value
|
||||
try:
|
||||
u = UUID(value)
|
||||
except ValueError as e:
|
||||
raise ValidationError() from e
|
||||
# Reject non-standard formats (braces, URNs, no-hyphens)
|
||||
if str(u) != value.lower():
|
||||
raise ValidationError()
|
||||
return str(u)
|
||||
|
||||
def to_url(self, value) -> str:
|
||||
return str(value)
|
||||
|
||||
# app setup (once)
|
||||
app.url_map.converters["uuid_str"] = StrictUUIDConverter
|
||||
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak.
|
||||
# There's also a bug between flask compress and socketio that causes some kind of slow memory leak
|
||||
# It's better to use compression on your reverse proxy (nginx etc) instead.
|
||||
if strtobool(os.getenv("FLASK_ENABLE_COMPRESSION")):
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress = FlaskCompress()
|
||||
compress.init_app(app)
|
||||
|
||||
compress = FlaskCompress()
|
||||
|
||||
compress.init_app(app)
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
@@ -212,23 +188,14 @@ def _is_safe_valid_url(test_url):
|
||||
from .validate_url import is_safe_valid_url
|
||||
return is_safe_valid_url(test_url)
|
||||
|
||||
@app.template_global('get_html_head_extras')
|
||||
def _get_html_head_extras():
|
||||
from .pluggy_interface import collect_html_head_extras
|
||||
return collect_html_head_extras()
|
||||
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
return formatted_value
|
||||
|
||||
@app.template_filter('regex_search')
|
||||
def _jinja2_filter_regex_search(value, pattern):
|
||||
import re
|
||||
return re.search(pattern, str(value)) is not None
|
||||
return formatted_value
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
@@ -393,8 +360,6 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
|
||||
return ''
|
||||
|
||||
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
"""Sanitize a tag title to create a valid CSS class name.
|
||||
@@ -406,8 +371,9 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
import re
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
@@ -495,21 +461,28 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
_locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||
# 3. Map browser locale to our internal locale if needed
|
||||
return _locale_aliases.get(browser_locale, browser_locale)
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -561,22 +534,22 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
|
||||
watch_api.add_resource(WatchHistoryDiff,
|
||||
'/api/v1/watch/<uuid_str:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history/<string:timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
watch_api.add_resource(WatchFavicon,
|
||||
'/api/v1/watch/<uuid_str:uuid>/favicon',
|
||||
'/api/v1/watch/<string:uuid>/favicon',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history',
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(CreateWatch, '/api/v1/watch',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(SystemInfo, '/api/v1/systeminfo',
|
||||
@@ -589,7 +562,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Tags, '/api/v1/tags',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Search, '/api/v1/search',
|
||||
@@ -598,8 +571,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Spec, '/api/v1/full-spec')
|
||||
|
||||
@login_manager.user_loader
|
||||
def user_loader(email):
|
||||
user = User()
|
||||
@@ -1022,16 +993,15 @@ def check_for_new_version():
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
session = requests.Session()
|
||||
session.verify = False
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = session.post("https://changedetection.io/check-ver.php",
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': __version__,
|
||||
'app_guid': datastore.data['app_guid'],
|
||||
'watch_count': len(datastore.data['watching'])
|
||||
})
|
||||
},
|
||||
|
||||
verify=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
+13
-146
@@ -5,8 +5,6 @@ from wtforms.widgets.core import TimeInput
|
||||
from flask_babel import lazy_gettext as _l, gettext
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
|
||||
from changedetectionio.llm.ui_strings import LLM_INTENT_WATCH_PLACEHOLDER
|
||||
from changedetectionio.llm.evaluator import DEFAULT_CHANGE_SUMMARY_PROMPT, LLM_DEFAULT_MAX_SUMMARY_TOKENS, LLM_DEFAULT_THINKING_BUDGET
|
||||
from changedetectionio.conditions.form import ConditionFormRow
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.strtobool import strtobool
|
||||
@@ -18,7 +16,6 @@ from wtforms import (
|
||||
Field,
|
||||
FloatField,
|
||||
IntegerField,
|
||||
PasswordField,
|
||||
RadioField,
|
||||
SelectField,
|
||||
StringField,
|
||||
@@ -611,12 +608,13 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import SafeXPath3Parser
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
@@ -670,11 +668,9 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise ValidationError("jq not support not found")
|
||||
|
||||
from changedetectionio.html_tools import validate_jq_expression
|
||||
input = line.replace('jq:', '')
|
||||
|
||||
try:
|
||||
validate_jq_expression(input)
|
||||
jq.compile(input)
|
||||
except (ValueError) as e:
|
||||
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
|
||||
@@ -728,7 +724,7 @@ class ValidateStartsWithRegex(object):
|
||||
raise ValidationError(self.message or _l("Invalid value."))
|
||||
|
||||
class quickWatchForm(Form):
|
||||
url = StringField(_l('URL'), validators=[validateURL()])
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
@@ -774,16 +770,16 @@ class SingleBrowserStep(Form):
|
||||
operation = SelectField(_l('Operation'), [validators.Optional()], choices=browser_step_ui_config.keys())
|
||||
|
||||
# maybe better to set some <script>var..
|
||||
selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": _l("CSS or xPath selector")})
|
||||
optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": _l("Value")})
|
||||
selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": "CSS or xPath selector"})
|
||||
optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": "Value"})
|
||||
# @todo move to JS? ajax fetch new field?
|
||||
# remove_button = SubmitField(_l('-'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
|
||||
# add_button = SubmitField(_l('+'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
|
||||
|
||||
class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
url = StringField(_l('Web Page URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group Tag'), [validators.Optional()], default='')
|
||||
url = fields.URLField('Web Page URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group Tag', [validators.Optional()], default='')
|
||||
|
||||
time_between_check = EnhancedFormField(
|
||||
TimeBetweenCheckForm,
|
||||
@@ -797,18 +793,10 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
time_between_check_use_default = BooleanField(_l('Use global settings for time between check and scheduler.'), default=False)
|
||||
|
||||
llm_intent = TextAreaField(_l('AI Change Intent'), validators=[validators.Optional(), validators.Length(max=2000)],
|
||||
render_kw={"rows": "5", "placeholder": LLM_INTENT_WATCH_PLACEHOLDER})
|
||||
|
||||
llm_change_summary = TextAreaField(_l('AI Change Summary'), validators=[validators.Optional(), validators.Length(max=2000)],
|
||||
render_kw={"rows": "5", "placeholder": DEFAULT_CHANGE_SUMMARY_PROMPT},
|
||||
default='')
|
||||
|
||||
include_filters = StringListField(_l('CSS/JSONPath/JQ/XPath Filters'), [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
|
||||
extract_lines_containing = StringListField(_l('Extract lines containing'), [validators.Optional()])
|
||||
extract_text = StringListField(_l('Extract text'), [ValidateListRegex()])
|
||||
|
||||
title = StringField(_l('Title'), default='')
|
||||
@@ -928,7 +916,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
class SingleExtraProxy(Form):
|
||||
# maybe better to set some <script>var..
|
||||
proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": _l("Name")})
|
||||
proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
proxy_url = StringField(_l('Proxy URL'), [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
@@ -940,7 +928,7 @@ class SingleExtraProxy(Form):
|
||||
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||
|
||||
class SingleExtraBrowser(Form):
|
||||
browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": _l("Name")})
|
||||
browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
browser_connection_url = StringField(_l('Browser connection URL'), [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
@@ -999,7 +987,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
api_access_token_enabled = BooleanField(_l('API access token security check enabled'), default=True, validators=[validators.Optional()])
|
||||
base_url = StringField(_l('Notification base URL override'),
|
||||
validators=[validators.Optional()],
|
||||
render_kw={"placeholder": os.getenv('BASE_URL', _l('Not set'))}
|
||||
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
|
||||
)
|
||||
empty_pages_are_a_change = BooleanField(_l('Treat empty pages as a change?'), default=False)
|
||||
fetch_backend = RadioField(_l('Fetch Method'), default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
@@ -1009,7 +997,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
# Screenshot comparison settings
|
||||
min_change_percentage = FloatField(
|
||||
_l('Screenshot: Minimum Change Percentage'),
|
||||
'Screenshot: Minimum Change Percentage',
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=0.0, max=100.0, message=_l('Must be between 0 and 100'))
|
||||
@@ -1018,7 +1006,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"})
|
||||
password = SaltyPasswordField(_l('Password'))
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
@@ -1049,126 +1037,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
|
||||
class globalSettingsLLMForm(Form):
|
||||
"""
|
||||
LLM / AI provider settings — stored under datastore['settings']['application']['llm'].
|
||||
|
||||
Uses litellm under the hood, so the model string encodes both the provider and model.
|
||||
No separate provider dropdown needed — litellm routes automatically:
|
||||
gpt-4o-mini → OpenAI
|
||||
claude-3-5-haiku-20251001 → Anthropic
|
||||
ollama/llama3.2 → Ollama (local)
|
||||
openrouter/google/gemma-3-12b-it:free → OpenRouter (free tier)
|
||||
gemini/gemini-2.0-flash → Google Gemini
|
||||
azure/gpt-4o → Azure OpenAI
|
||||
"""
|
||||
llm_model = StringField(
|
||||
_l('Model'),
|
||||
validators=[validators.Optional()],
|
||||
render_kw={"placeholder": "gpt-4o-mini", "style": "width: 24em;"},
|
||||
)
|
||||
llm_api_key = PasswordField(
|
||||
_l('API Key'),
|
||||
validators=[validators.Optional()],
|
||||
render_kw={
|
||||
"placeholder": _l('Leave blank to use LITELLM_API_KEY env var'),
|
||||
"autocomplete": "off",
|
||||
"style": "width: 24em;",
|
||||
},
|
||||
)
|
||||
llm_api_base = StringField(
|
||||
_l('API Base URL'),
|
||||
validators=[validators.Optional()],
|
||||
render_kw={
|
||||
"placeholder": "http://localhost:11434 (Ollama / custom endpoints only)",
|
||||
"style": "width: 24em;",
|
||||
},
|
||||
)
|
||||
llm_change_summary_default = TextAreaField(
|
||||
_l('Default AI Change Summary prompt'),
|
||||
validators=[validators.Optional(), validators.Length(max=2000)],
|
||||
render_kw={
|
||||
"rows": "5",
|
||||
"placeholder": DEFAULT_CHANGE_SUMMARY_PROMPT,
|
||||
"style": "width: 100%; ",
|
||||
},
|
||||
default='',
|
||||
)
|
||||
llm_max_tokens_per_check = IntegerField(
|
||||
_l('Max tokens per check'),
|
||||
validators=[validators.Optional(), validators.NumberRange(min=0)],
|
||||
default=0,
|
||||
render_kw={
|
||||
"placeholder": "0 = unlimited",
|
||||
"style": "width: 8em;",
|
||||
},
|
||||
)
|
||||
llm_max_tokens_cumulative = IntegerField(
|
||||
_l('Max cumulative tokens (per watch)'),
|
||||
validators=[validators.Optional(), validators.NumberRange(min=0)],
|
||||
default=0,
|
||||
render_kw={
|
||||
"placeholder": "0 = unlimited",
|
||||
"style": "width: 8em;",
|
||||
},
|
||||
)
|
||||
llm_token_budget_month = IntegerField(
|
||||
_l('Monthly token budget'),
|
||||
validators=[validators.Optional(), validators.NumberRange(min=0)],
|
||||
default=0,
|
||||
render_kw={"style": "width: 10em;"},
|
||||
)
|
||||
llm_max_input_chars = IntegerField(
|
||||
_l('Max input characters'),
|
||||
validators=[validators.Optional(), validators.NumberRange(min=1)],
|
||||
default=100000,
|
||||
render_kw={
|
||||
"placeholder": "100000",
|
||||
"style": "width: 10em;",
|
||||
},
|
||||
)
|
||||
llm_override_diff_with_summary = BooleanField(
|
||||
_l('Replace {{diff}} notification token with AI summary'),
|
||||
default=True,
|
||||
)
|
||||
llm_restock_use_fallback_extract = BooleanField(
|
||||
_l('Use LLM as a fallback for extracting price and restock info'),
|
||||
default=True,
|
||||
)
|
||||
llm_thinking_budget = SelectField(
|
||||
_l('AI thinking budget (tokens)'),
|
||||
choices=[
|
||||
('0', _l('Off (no thinking)')),
|
||||
('100', '100'),
|
||||
('500', '500'),
|
||||
('2000', '2000'),
|
||||
],
|
||||
default=str(LLM_DEFAULT_THINKING_BUDGET),
|
||||
validators=[validators.Optional()],
|
||||
)
|
||||
llm_max_summary_tokens = SelectField(
|
||||
_l('Max AI summary length (tokens)'),
|
||||
choices=[
|
||||
('500', '500'),
|
||||
('1000', '1000'),
|
||||
('3000', '3000'),
|
||||
('5000', '5000'),
|
||||
('10000', '10000'),
|
||||
('15000', '15000'),
|
||||
],
|
||||
default=str(LLM_DEFAULT_MAX_SUMMARY_TOKENS),
|
||||
validators=[validators.Optional()],
|
||||
)
|
||||
llm_budget_action = RadioField(
|
||||
_l('When monthly token budget is reached'),
|
||||
choices=[
|
||||
('skip_llm', _l('Skip AI summarisation only (watch still checks)')),
|
||||
('skip_check', _l('Skip the watch check entirely')),
|
||||
],
|
||||
default='skip_llm',
|
||||
)
|
||||
|
||||
|
||||
class globalSettingsForm(Form):
|
||||
# Define these as FormFields/"sub forms", this way it matches the JSON storage
|
||||
# datastore.data['settings']['application']..
|
||||
@@ -1181,7 +1049,6 @@ class globalSettingsForm(Form):
|
||||
|
||||
requests = FormField(globalSettingsRequestForm)
|
||||
application = FormField(globalSettingsApplicationForm)
|
||||
llm = FormField(globalSettingsLLMForm)
|
||||
save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
|
||||
+13
-146
@@ -4,7 +4,6 @@ from loguru import logger
|
||||
from typing import List
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
@@ -14,45 +13,6 @@ PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
|
||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# jq builtins that can leak sensitive data or cause harm when user-supplied expressions are executed.
|
||||
# env/$ENV reads all process environment variables (passwords, API keys, etc.)
|
||||
# include/import can read arbitrary files from disk
|
||||
# input/inputs reads beyond the supplied JSON data
|
||||
# debug/stderr leaks data to stderr
|
||||
# halt/halt_error terminates the process (DoS)
|
||||
_JQ_BLOCKED_PATTERNS = [
|
||||
(re.compile(r'\benv\b'), 'env (reads environment variables)'),
|
||||
(re.compile(r'\$ENV\b'), '$ENV (reads environment variables)'),
|
||||
(re.compile(r'\binclude\b'), 'include (reads files from disk)'),
|
||||
(re.compile(r'\bimport\b'), 'import (reads files from disk)'),
|
||||
(re.compile(r'\binputs?\b'), 'input/inputs (reads beyond provided data)'),
|
||||
(re.compile(r'\bdebug\b'), 'debug (leaks data to stderr)'),
|
||||
(re.compile(r'\bstderr\b'), 'stderr (leaks data to stderr)'),
|
||||
(re.compile(r'\bhalt(?:_error)?\b'), 'halt/halt_error (terminates the process)'),
|
||||
(re.compile(r'\$__loc__\b'), '$__loc__ (leaks file path information)'),
|
||||
(re.compile(r'\bbuiltins\b'), 'builtins (enumerates available functions)'),
|
||||
(re.compile(r'\bmodulemeta\b'), 'modulemeta (leaks module information)'),
|
||||
(re.compile(r'\$JQ_BUILD_CONFIGURATION\b'), '$JQ_BUILD_CONFIGURATION (leaks build information)'),
|
||||
]
|
||||
|
||||
def validate_jq_expression(expression: str) -> None:
|
||||
"""Raise ValueError if the jq expression uses any dangerous builtin.
|
||||
|
||||
User-supplied jq expressions are executed server-side. Without this check,
|
||||
builtins like `env` expose every process environment variable (SALTED_PASS,
|
||||
proxy credentials, API keys, etc.) as watch output.
|
||||
"""
|
||||
from changedetectionio.strtobool import strtobool
|
||||
if strtobool(os.getenv('JQ_ALLOW_RISKY_EXPRESSIONS', 'false')):
|
||||
return
|
||||
|
||||
for pattern, description in _JQ_BLOCKED_PATTERNS:
|
||||
if pattern.search(expression):
|
||||
msg = f"jq expression uses disallowed builtin: {description}"
|
||||
logger.critical(f"Security: blocked jq expression containing '{description}' - expression: {expression!r}")
|
||||
raise ValueError(msg)
|
||||
|
||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
@@ -63,59 +23,6 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||
'unparsed-text',
|
||||
'unparsed-text-lines',
|
||||
'unparsed-text-available',
|
||||
'doc',
|
||||
'doc-available',
|
||||
'json-doc',
|
||||
'json-doc-available',
|
||||
'collection', # XPath 2.0+: loads XML node collections from arbitrary URIs
|
||||
'uri-collection', # XPath 3.0+: enumerates URIs from resource collections
|
||||
'transform', # XPath 3.1: XSLT transformation (currently raises, block proactively)
|
||||
'load-xquery-module', # XPath 3.1: loads XQuery modules (currently raises, block proactively)
|
||||
'environment-variable',
|
||||
'available-environment-variables',
|
||||
]
|
||||
|
||||
|
||||
def _build_safe_xpath3_parser():
|
||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||
|
||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||
- doc / doc-available (XML fetch from URI)
|
||||
- environment-variable / available-environment-variables (env var leakage)
|
||||
|
||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||
so removing entries here does not affect XPath3Parser itself.
|
||||
|
||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||
"""
|
||||
import os
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
class SafeXPath3Parser(XPath3Parser):
|
||||
pass
|
||||
|
||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||
if env_override is not None:
|
||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||
else:
|
||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||
|
||||
for _fn in blocked:
|
||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||
|
||||
return SafeXPath3Parser
|
||||
|
||||
|
||||
# Module-level singleton — built once, reused everywhere.
|
||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
@@ -276,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
tree = None
|
||||
@@ -301,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
@@ -326,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
# Drop element references before the finally block so tree.clear() can release
|
||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||
del r
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
@@ -424,16 +330,12 @@ def _parse_json(json_data, json_filter):
|
||||
raise Exception("jq not support not found")
|
||||
|
||||
if json_filter.startswith("jq:"):
|
||||
expr = json_filter.removeprefix("jq:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
if json_filter.startswith("jqraw:"):
|
||||
expr = json_filter.removeprefix("jqraw:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return '\n'.join(str(item) for item in match)
|
||||
|
||||
@@ -537,25 +439,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||
# Server may claim application/json but actually return JSONP
|
||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||
if jsonp_match:
|
||||
try:
|
||||
inner = jsonp_match.group(1).strip()
|
||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
@@ -671,33 +561,10 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
|
||||
)
|
||||
else:
|
||||
parser_config = None
|
||||
|
||||
if is_rss:
|
||||
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||
else:
|
||||
# Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
|
||||
# causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
|
||||
# can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
|
||||
# causing the regex to scan past the intended close and eat real page content.
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
# Strip tags that inscriptis cannot render as meaningful text and which can be very large.
|
||||
# svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
|
||||
# video/audio/picture are kept — they may contain meaningful fallback text or captions.
|
||||
for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
|
||||
'math', 'canvas', 'iframe', 'template']):
|
||||
tag.decompose()
|
||||
|
||||
# SPAs often use <body style="display:none"> to hide content until JS loads.
|
||||
# inscriptis respects CSS display rules, so strip hiding styles from the body tag.
|
||||
body_tag = soup.find('body')
|
||||
if body_tag and body_tag.get('style'):
|
||||
style = body_tag['style']
|
||||
if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
|
||||
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
|
||||
del body_tag['style']
|
||||
|
||||
html_content = str(soup)
|
||||
|
||||
text_content = get_text(html_content, config=parser_config)
|
||||
return text_content
|
||||
|
||||
@@ -28,20 +28,17 @@ def get_timeago_locale(flask_locale):
|
||||
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'pt_BR': 'pt_BR', # Portuguese (Brasil)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'ja': 'ja', # Japanese
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
@@ -55,8 +52,7 @@ LANGUAGE_DATA = {
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
|
||||
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
|
||||
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
@@ -71,7 +67,6 @@ LANGUAGE_DATA = {
|
||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||
'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
# LLM intent-based change evaluation
|
||||
@@ -1,52 +0,0 @@
|
||||
"""
|
||||
BM25-based relevance trimming for large snapshot text.
|
||||
|
||||
When a snapshot is large and no CSS pre-filter has narrowed it down,
|
||||
we use BM25 to select the lines most relevant to the user's intent
|
||||
before sending to the LLM. This keeps the context focused without
|
||||
an arbitrary char truncation.
|
||||
|
||||
Pure functions — no side effects, fully testable.
|
||||
"""
|
||||
|
||||
MAX_CONTEXT_CHARS = 15_000
|
||||
|
||||
|
||||
def trim_to_relevant(text: str, query: str, max_chars: int = MAX_CONTEXT_CHARS) -> str:
|
||||
"""
|
||||
Return the lines from `text` most relevant to `query` up to `max_chars`.
|
||||
If text fits within budget, return it unchanged.
|
||||
Falls back to head-truncation if rank_bm25 is unavailable.
|
||||
"""
|
||||
if not text or not query:
|
||||
return text or ''
|
||||
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
|
||||
lines = [l for l in text.splitlines() if l.strip()]
|
||||
if not lines:
|
||||
return text[:max_chars]
|
||||
|
||||
try:
|
||||
from rank_bm25 import BM25Okapi
|
||||
except ImportError:
|
||||
# rank-bm25 not installed — fall back to simple head truncation
|
||||
return text[:max_chars]
|
||||
|
||||
tokenized = [line.lower().split() for line in lines]
|
||||
bm25 = BM25Okapi(tokenized)
|
||||
scores = bm25.get_scores(query.lower().split())
|
||||
|
||||
ranked = sorted(enumerate(zip(scores, lines)), key=lambda x: x[1][0], reverse=True)
|
||||
|
||||
selected_indices, total = [], 0
|
||||
for idx, (_score, line) in ranked:
|
||||
if total + len(line) + 1 > max_chars:
|
||||
break
|
||||
selected_indices.append(idx)
|
||||
total += len(line) + 1
|
||||
|
||||
# Re-order selected lines to preserve original document order
|
||||
ordered = [lines[i] for i in sorted(selected_indices)]
|
||||
return '\n'.join(ordered)
|
||||
@@ -1,115 +0,0 @@
|
||||
"""
|
||||
Thin wrapper around litellm.completion.
|
||||
Keeps litellm import isolated so the rest of the codebase doesn't depend on it directly,
|
||||
and makes the call easy to mock in tests.
|
||||
"""
|
||||
|
||||
import os
|
||||
from loguru import logger
|
||||
|
||||
# Default output token cap for JSON-returning calls (intent eval, preview, setup).
|
||||
# These return small JSON objects — 400 is enough for a verbose explanation while
|
||||
# still preventing runaway cost. Change summaries pass their own max_tokens via
|
||||
# _summary_max_tokens() and are NOT subject to this cap.
|
||||
_MAX_COMPLETION_TOKENS = 400
|
||||
|
||||
DEFAULT_TIMEOUT = int(os.getenv('LLM_TIMEOUT', 60))
|
||||
DEFAULT_RETRIES = 3
|
||||
|
||||
|
||||
def completion(model: str, messages: list, api_key: str = None,
|
||||
api_base: str = None, timeout: int = DEFAULT_TIMEOUT,
|
||||
max_tokens: int = None, extra_body: dict = None) -> tuple[str, int, int, int]:
|
||||
"""
|
||||
Call the LLM and return (response_text, total_tokens, input_tokens, output_tokens).
|
||||
Retries up to DEFAULT_RETRIES times on timeout or connection errors.
|
||||
Token counts are 0 if the provider doesn't return usage data.
|
||||
Raises on network/auth errors — callers handle gracefully.
|
||||
"""
|
||||
try:
|
||||
import litellm
|
||||
except ImportError:
|
||||
raise RuntimeError("litellm is not installed. Add it to requirements.txt.")
|
||||
|
||||
_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
|
||||
|
||||
kwargs = {
|
||||
'model': model,
|
||||
'messages': messages,
|
||||
'timeout': _timeout,
|
||||
'temperature': 0,
|
||||
'max_tokens': max_tokens if max_tokens is not None else _MAX_COMPLETION_TOKENS,
|
||||
}
|
||||
if api_key:
|
||||
kwargs['api_key'] = api_key
|
||||
if api_base:
|
||||
kwargs['api_base'] = api_base
|
||||
if extra_body:
|
||||
kwargs['extra_body'] = extra_body
|
||||
|
||||
_retryable = (litellm.Timeout, litellm.APIConnectionError)
|
||||
|
||||
for attempt in range(1, DEFAULT_RETRIES + 1):
|
||||
try:
|
||||
response = litellm.completion(**kwargs)
|
||||
choice = response.choices[0]
|
||||
message = choice.message
|
||||
finish = getattr(choice, 'finish_reason', None)
|
||||
|
||||
text = message.content or ''
|
||||
|
||||
if not text:
|
||||
# Some providers (e.g. Gemini) put text in message.parts instead of .content
|
||||
parts = getattr(message, 'parts', None)
|
||||
if parts:
|
||||
text = ''.join(getattr(p, 'text', '') or '' for p in parts).strip()
|
||||
logger.debug(f"LLM client: extracted text from message.parts ({len(parts)} parts) model={model!r}")
|
||||
|
||||
if finish == 'length':
|
||||
logger.warning(
|
||||
f"LLM client: response truncated (finish_reason='length') model={model!r} "
|
||||
f"— increase max_tokens; got {len(text)} chars so far"
|
||||
)
|
||||
|
||||
if not text:
|
||||
logger.warning(
|
||||
f"LLM client: empty content from model={model!r} "
|
||||
f"finish_reason={finish!r} "
|
||||
f"message={message!r}"
|
||||
)
|
||||
|
||||
usage = getattr(response, 'usage', None)
|
||||
input_tokens = int(getattr(usage, 'prompt_tokens', 0) or 0) if usage else 0
|
||||
output_tokens = int(getattr(usage, 'completion_tokens', 0) or 0) if usage else 0
|
||||
total_tokens = int(getattr(usage, 'total_tokens', 0) or 0) if usage else (input_tokens + output_tokens)
|
||||
logger.debug(
|
||||
f"LLM client: model={model!r} finish={finish!r} "
|
||||
f"tokens={total_tokens} (in={input_tokens} out={output_tokens}) "
|
||||
f"text_len={len(text)}"
|
||||
)
|
||||
return text, total_tokens, input_tokens, output_tokens
|
||||
|
||||
except _retryable as e:
|
||||
# litellm formats its Timeout message with None when the provider doesn't
|
||||
# propagate the timeout value — patch the exception args in-place so every
|
||||
# caller that logs str(e) sees the real number.
|
||||
_fix = f'after {_timeout} seconds'
|
||||
try:
|
||||
e.args = tuple(str(a).replace('after None seconds', _fix) for a in e.args)
|
||||
except Exception:
|
||||
pass
|
||||
if attempt < DEFAULT_RETRIES:
|
||||
logger.warning(
|
||||
f"LLM call timed out/connection error (attempt {attempt}/{DEFAULT_RETRIES}), "
|
||||
f"retrying — model={model!r} timeout={_timeout}s error={e}"
|
||||
)
|
||||
continue
|
||||
logger.warning(
|
||||
f"LLM call failed after {DEFAULT_RETRIES} attempts ({_timeout}s timeout) "
|
||||
f"model={model!r} error={e}"
|
||||
)
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM call failed: model={model!r} error={e}")
|
||||
raise
|
||||
@@ -1,611 +0,0 @@
|
||||
"""
|
||||
LLM evaluation orchestration.
|
||||
|
||||
Two public entry points:
|
||||
- run_setup(watch, datastore) — one-time: decide if pre-filter needed
|
||||
- evaluate_change(watch, datastore, diff, current_snapshot) — per-change evaluation
|
||||
|
||||
Intent resolution: watch.llm_intent → first tag with llm_intent → None (no evaluation)
|
||||
Cache: each (intent, diff) pair is evaluated exactly once, result stored in watch.
|
||||
|
||||
Environment variable overrides (take priority over datastore settings):
|
||||
LLM_MODEL — model string (e.g. "gpt-4o-mini", "ollama/llama3.2")
|
||||
LLM_API_KEY — API key for cloud providers
|
||||
LLM_API_BASE — base URL for local/custom endpoints (e.g. http://localhost:11434)
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from loguru import logger
|
||||
|
||||
from . import client as llm_client
|
||||
from .prompt_builder import (
|
||||
build_change_summary_prompt, build_change_summary_system_prompt,
|
||||
build_eval_prompt, build_eval_system_prompt,
|
||||
build_preview_prompt, build_preview_system_prompt,
|
||||
build_setup_prompt, build_setup_system_prompt,
|
||||
)
|
||||
from .response_parser import parse_eval_response, parse_preview_response, parse_setup_response
|
||||
|
||||
_DEFAULT_MAX_INPUT_CHARS = 100_000
|
||||
|
||||
def _get_max_input_chars(datastore) -> int:
|
||||
"""Max input characters to send to the LLM. Resolution: env var → datastore → 100,000.
|
||||
Always returns at least 1 — unlimited is not permitted.
|
||||
"""
|
||||
env_val = os.getenv('LLM_MAX_INPUT_CHARS', '').strip()
|
||||
if env_val.isdigit() and int(env_val) > 0:
|
||||
return int(env_val)
|
||||
cfg = datastore.data.get('settings', {}).get('application', {}).get('llm') or {}
|
||||
stored = cfg.get('max_input_chars')
|
||||
if stored and int(stored) > 0:
|
||||
return int(stored)
|
||||
return _DEFAULT_MAX_INPUT_CHARS
|
||||
|
||||
|
||||
class LLMInputTooLargeError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _check_input_size(text: str, max_chars: int) -> None:
|
||||
"""Raise LLMInputTooLargeError if text exceeds max_chars."""
|
||||
if len(text) > max_chars:
|
||||
raise LLMInputTooLargeError(
|
||||
f"Change too large for AI summary ({len(text):,} chars, limit {max_chars:,})"
|
||||
)
|
||||
|
||||
|
||||
LLM_DEFAULT_THINKING_BUDGET = 0 # 0 = thinking disabled by default
|
||||
|
||||
def _thinking_extra_body(model: str, budget: int) -> dict | None:
|
||||
"""Return litellm extra_body to control thinking for models that support it.
|
||||
For Gemini 2.5+: passes thinkingConfig with the given budget (0 = disabled).
|
||||
For all other models: returns None (no-op).
|
||||
"""
|
||||
if not model.startswith('gemini/gemini-2.5'):
|
||||
return None
|
||||
return {'generationConfig': {'thinkingConfig': {'thinkingBudget': budget}}}
|
||||
|
||||
|
||||
def _cached_system(text: str, model: str = '') -> dict:
|
||||
"""Wrap a system prompt, adding Anthropic prompt-caching headers only for Anthropic models.
|
||||
Gemini and other providers have their own caching APIs that break when they receive
|
||||
cache_control, so we only apply it where it's supported.
|
||||
"""
|
||||
is_anthropic = model.startswith('claude') or model.startswith('anthropic/')
|
||||
if is_anthropic:
|
||||
return {'role': 'system', 'content': [{'type': 'text', 'text': text, 'cache_control': {'type': 'ephemeral'}}]}
|
||||
return {'role': 'system', 'content': text}
|
||||
|
||||
|
||||
LLM_DEFAULT_MAX_SUMMARY_TOKENS = 3000
|
||||
|
||||
# Default prompt used when the user hasn't configured llm_change_summary
|
||||
DEFAULT_CHANGE_SUMMARY_PROMPT = "Describe in plain English what changed — list what was added or removed as bullet points, including key details for each item. Be careful of content that merely just moved around, you should mention that it moved but dont report that it was added/removed etc. Be considerate of the style content you are summarising the change of, adjust your report accordingly. Do not quote non-English text verbatim; translate and summarise all content into English. Your entire response must be in English."
|
||||
|
||||
|
||||
def _summary_max_tokens(diff: str, max_cap: int = LLM_DEFAULT_MAX_SUMMARY_TOKENS) -> int:
|
||||
"""Scale completion tokens to diff size: floor 400, ~1 token per 4 chars, ceiling max_cap."""
|
||||
return max(400, min(len(diff) // 4, max_cap))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Intent resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def resolve_llm_field(watch, datastore, field: str) -> tuple[str, str]:
|
||||
"""
|
||||
Generic cascade resolver for any LLM per-watch field.
|
||||
Returns (value, source) where source is 'watch' or tag title.
|
||||
Returns ('', '') if not set anywhere.
|
||||
"""
|
||||
value = (watch.get(field) or '').strip()
|
||||
if value:
|
||||
return value, 'watch'
|
||||
|
||||
for tag_uuid in watch.get('tags', []):
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
||||
if tag:
|
||||
tag_value = (tag.get(field) or '').strip()
|
||||
if tag_value:
|
||||
return tag_value, tag.get('title', 'tag')
|
||||
|
||||
return '', ''
|
||||
|
||||
|
||||
def resolve_intent(watch, datastore) -> tuple[str, str]:
|
||||
"""
|
||||
Return (intent, source) where source is 'watch' or tag title.
|
||||
Returns ('', '') if no intent is configured anywhere.
|
||||
"""
|
||||
intent = (watch.get('llm_intent') or '').strip()
|
||||
if intent:
|
||||
return intent, 'watch'
|
||||
|
||||
for tag_uuid in watch.get('tags', []):
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
||||
if tag:
|
||||
tag_intent = (tag.get('llm_intent') or '').strip()
|
||||
if tag_intent:
|
||||
return tag_intent, tag.get('title', 'tag')
|
||||
|
||||
return '', ''
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM config helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_llm_config(datastore) -> dict | None:
|
||||
"""
|
||||
Return LLM config dict or None if not configured.
|
||||
|
||||
Resolution order (first non-empty model wins):
|
||||
1. Environment variables: LLM_MODEL, LLM_API_KEY, LLM_API_BASE
|
||||
2. Datastore settings (set via UI)
|
||||
"""
|
||||
# 1. Environment variable override
|
||||
env_model = os.getenv('LLM_MODEL', '').strip()
|
||||
if env_model:
|
||||
return {
|
||||
'model': env_model,
|
||||
'api_key': os.getenv('LLM_API_KEY', '').strip(),
|
||||
'api_base': os.getenv('LLM_API_BASE', '').strip(),
|
||||
}
|
||||
|
||||
# 2. Datastore settings
|
||||
cfg = datastore.data['settings']['application'].get('llm') or {}
|
||||
if not cfg.get('model'):
|
||||
return None
|
||||
return cfg
|
||||
|
||||
|
||||
def llm_configured_via_env() -> bool:
|
||||
"""True when LLM config comes from environment variables, not the UI."""
|
||||
return bool(os.getenv('LLM_MODEL', '').strip())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Global monthly token budget
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _get_month_key() -> str:
|
||||
"""Returns 'YYYY-MM' for the current UTC month."""
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m")
|
||||
|
||||
|
||||
def get_global_token_budget_month(datastore=None) -> int:
|
||||
"""
|
||||
Monthly token budget ceiling. Resolution order:
|
||||
1. LLM_TOKEN_BUDGET_MONTH env var (takes priority, makes field read-only in UI)
|
||||
2. datastore settings (set via UI)
|
||||
Returns 0 (no limit) if not set anywhere.
|
||||
"""
|
||||
try:
|
||||
env_val = int(os.getenv('LLM_TOKEN_BUDGET_MONTH', '0'))
|
||||
if env_val > 0:
|
||||
return env_val
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if datastore is not None:
|
||||
try:
|
||||
stored = datastore.data['settings']['application'].get('llm') or {}
|
||||
val = int(stored.get('token_budget_month') or 0)
|
||||
return max(0, val)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
def _estimate_cost_usd(model: str, input_tokens: int, output_tokens: int) -> float:
|
||||
"""
|
||||
Return estimated cost in USD using litellm's pricing database.
|
||||
Returns 0.0 for unknown models (local/Ollama/custom endpoints).
|
||||
Never raises — cost estimation is best-effort.
|
||||
"""
|
||||
if not model or (not input_tokens and not output_tokens):
|
||||
return 0.0
|
||||
try:
|
||||
from litellm.cost_calculator import cost_per_token
|
||||
prompt_cost, completion_cost = cost_per_token(
|
||||
model=model,
|
||||
prompt_tokens=input_tokens,
|
||||
completion_tokens=output_tokens,
|
||||
)
|
||||
return float(prompt_cost + completion_cost)
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
def accumulate_global_tokens(datastore, tokens: int,
|
||||
input_tokens: int = 0, output_tokens: int = 0,
|
||||
model: str = '') -> None:
|
||||
"""
|
||||
Add *tokens* to both the all-time and this-month global counters.
|
||||
When input_tokens / output_tokens / model are supplied the estimated
|
||||
USD cost is accumulated alongside the token counts.
|
||||
Resets monthly counters automatically on month rollover.
|
||||
|
||||
These counters live at datastore.data['settings']['application']['llm']
|
||||
and are intentionally read-only from the API/form side — they are only
|
||||
ever written here, in a controlled way.
|
||||
"""
|
||||
if tokens <= 0:
|
||||
return
|
||||
|
||||
current_month = _get_month_key()
|
||||
cost = _estimate_cost_usd(model, input_tokens, output_tokens)
|
||||
|
||||
# Work on the live dict in-place (or create a stub if llm key is absent)
|
||||
app_settings = datastore.data['settings']['application']
|
||||
if 'llm' not in app_settings:
|
||||
app_settings['llm'] = {}
|
||||
llm_cfg = app_settings['llm']
|
||||
|
||||
# Month rollover: reset monthly counters
|
||||
if llm_cfg.get('tokens_month_key') != current_month:
|
||||
llm_cfg['tokens_this_month'] = 0
|
||||
llm_cfg['cost_usd_this_month'] = 0.0
|
||||
llm_cfg['tokens_month_key'] = current_month
|
||||
|
||||
llm_cfg['tokens_total_cumulative'] = (llm_cfg.get('tokens_total_cumulative') or 0) + tokens
|
||||
llm_cfg['tokens_this_month'] = (llm_cfg.get('tokens_this_month') or 0) + tokens
|
||||
llm_cfg['cost_usd_total_cumulative'] = (llm_cfg.get('cost_usd_total_cumulative') or 0.0) + cost
|
||||
llm_cfg['cost_usd_this_month'] = (llm_cfg.get('cost_usd_this_month') or 0.0) + cost
|
||||
|
||||
# Persist immediately — token accounting must survive restarts
|
||||
datastore.commit()
|
||||
|
||||
|
||||
def is_global_token_budget_exceeded(datastore) -> bool:
|
||||
"""
|
||||
Returns True when a monthly token budget is configured (via
|
||||
LLM_TOKEN_BUDGET_MONTH) and the current month's usage has reached
|
||||
or exceeded that budget.
|
||||
"""
|
||||
budget = get_global_token_budget_month(datastore)
|
||||
if not budget:
|
||||
return False
|
||||
|
||||
llm_cfg = datastore.data['settings']['application'].get('llm') or {}
|
||||
if llm_cfg.get('tokens_month_key') != _get_month_key():
|
||||
# Counter hasn't been updated yet this month → zero usage
|
||||
return False
|
||||
|
||||
return (llm_cfg.get('tokens_this_month') or 0) >= budget
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# One-time setup: derive pre-filter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_token_budget(watch, cfg, tokens_this_call: int = 0) -> bool:
|
||||
"""
|
||||
Check token budget limits. Returns True if within budget, False if exceeded.
|
||||
Also accumulates tokens_this_call into watch['llm_tokens_used_cumulative'].
|
||||
"""
|
||||
if tokens_this_call > 0:
|
||||
current = watch.get('llm_tokens_used_cumulative') or 0
|
||||
watch['llm_tokens_used_cumulative'] = current + tokens_this_call
|
||||
|
||||
max_per_check = int(cfg.get('max_tokens_per_check') or 0)
|
||||
max_cumulative = int(cfg.get('max_tokens_cumulative') or 0)
|
||||
|
||||
if max_per_check and tokens_this_call > max_per_check:
|
||||
logger.warning(
|
||||
f"LLM token budget exceeded for {watch.get('uuid')}: "
|
||||
f"{tokens_this_call} tokens > per-check limit {max_per_check}"
|
||||
)
|
||||
return False
|
||||
|
||||
if max_cumulative:
|
||||
total = watch.get('llm_tokens_used_cumulative') or 0
|
||||
if total > max_cumulative:
|
||||
logger.warning(
|
||||
f"LLM cumulative token budget exceeded for {watch.get('uuid')}: "
|
||||
f"{total} tokens > limit {max_cumulative}"
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def run_setup(watch, datastore, snapshot_text: str) -> None:
|
||||
"""
|
||||
Ask the LLM whether a CSS pre-filter would improve precision for this intent.
|
||||
Stores result in watch['llm_prefilter'] (str selector or None).
|
||||
Called once when intent is first set, and again if pre-filter returns zero matches.
|
||||
"""
|
||||
cfg = get_llm_config(datastore)
|
||||
if not cfg:
|
||||
return
|
||||
|
||||
intent, _ = resolve_intent(watch, datastore)
|
||||
if not intent:
|
||||
return
|
||||
|
||||
url = watch.get('url', '')
|
||||
system_prompt = build_setup_system_prompt()
|
||||
user_prompt = build_setup_prompt(intent, snapshot_text, url=url)
|
||||
|
||||
try:
|
||||
raw, tokens, *_ = llm_client.completion(
|
||||
model=cfg['model'],
|
||||
messages=[
|
||||
_cached_system(system_prompt, model=cfg['model']),
|
||||
{'role': 'user', 'content': user_prompt},
|
||||
],
|
||||
api_key=cfg.get('api_key'),
|
||||
api_base=cfg.get('api_base'),
|
||||
extra_body=_thinking_extra_body(cfg['model'], int(datastore.data['settings']['application'].get('llm_thinking_budget', LLM_DEFAULT_THINKING_BUDGET) or 0)),
|
||||
)
|
||||
_check_token_budget(watch, cfg, tokens)
|
||||
accumulate_global_tokens(datastore, tokens, model=cfg['model'])
|
||||
result = parse_setup_response(raw)
|
||||
watch['llm_prefilter'] = result['selector']
|
||||
logger.debug(f"LLM setup for {watch.get('uuid')}: prefilter={result['selector']} reason={result['reason']}")
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM setup call failed for {watch.get('uuid')}: {e}")
|
||||
watch['llm_prefilter'] = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AI Change Summary — human-readable description of what changed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_effective_summary_prompt(watch, datastore) -> str:
|
||||
"""Return the prompt that summarise_change will use.
|
||||
|
||||
Cascade: watch → tag → global settings default → hardcoded fallback.
|
||||
"""
|
||||
prompt, _ = resolve_llm_field(watch, datastore, 'llm_change_summary')
|
||||
if prompt:
|
||||
return prompt
|
||||
global_default = (
|
||||
datastore.data.get('settings', {})
|
||||
.get('application', {})
|
||||
.get('llm_change_summary_default', '') or ''
|
||||
).strip()
|
||||
return global_default or DEFAULT_CHANGE_SUMMARY_PROMPT
|
||||
|
||||
|
||||
def compute_summary_cache_key(diff_text: str, prompt: str) -> str:
|
||||
"""Stable 16-char hex key for a (diff, prompt) pair. Stored alongside the summary file."""
|
||||
h = hashlib.md5()
|
||||
h.update(diff_text.encode('utf-8', errors='replace'))
|
||||
h.update(b'\x00')
|
||||
h.update(prompt.encode('utf-8', errors='replace'))
|
||||
return h.hexdigest()[:16]
|
||||
|
||||
|
||||
def summarise_change(watch, datastore, diff: str, current_snapshot: str = '') -> str:
|
||||
"""
|
||||
Generate a plain-language summary of the change using the watch's
|
||||
llm_change_summary prompt (cascades from tag if not set on watch).
|
||||
|
||||
Returns the summary string, or '' on failure.
|
||||
The result replaces {{ diff }} in notifications so the user gets a
|
||||
readable description instead of raw +/- diff lines.
|
||||
"""
|
||||
cfg = get_llm_config(datastore)
|
||||
if not cfg:
|
||||
return ''
|
||||
|
||||
if is_global_token_budget_exceeded(datastore):
|
||||
budget = get_global_token_budget_month(datastore)
|
||||
llm_cfg = datastore.data['settings']['application'].get('llm') or {}
|
||||
used = llm_cfg.get('tokens_this_month', 0)
|
||||
logger.warning(
|
||||
f"LLM summarise_change skipped: monthly budget {budget:,} reached "
|
||||
f"({used:,} used this month)"
|
||||
)
|
||||
return ''
|
||||
|
||||
custom_prompt = get_effective_summary_prompt(watch, datastore)
|
||||
if not diff.strip():
|
||||
return ''
|
||||
|
||||
_check_input_size(diff, _get_max_input_chars(datastore))
|
||||
url = watch.get('url', '')
|
||||
title = watch.get('page_title') or watch.get('title') or ''
|
||||
|
||||
system_prompt = build_change_summary_system_prompt()
|
||||
user_prompt = build_change_summary_prompt(
|
||||
diff=diff,
|
||||
custom_prompt=custom_prompt,
|
||||
current_snapshot=current_snapshot,
|
||||
url=url,
|
||||
title=title,
|
||||
)
|
||||
|
||||
_thinking_budget = int(datastore.data['settings']['application'].get('llm_thinking_budget', LLM_DEFAULT_THINKING_BUDGET) or 0)
|
||||
_extra_body = _thinking_extra_body(cfg['model'], _thinking_budget)
|
||||
|
||||
try:
|
||||
_resp = llm_client.completion(
|
||||
model=cfg['model'],
|
||||
messages=[
|
||||
_cached_system(system_prompt, model=cfg['model']),
|
||||
{'role': 'user', 'content': user_prompt},
|
||||
],
|
||||
api_key=cfg.get('api_key'),
|
||||
api_base=cfg.get('api_base'),
|
||||
max_tokens=_summary_max_tokens(
|
||||
diff,
|
||||
max_cap=int(datastore.data['settings']['application'].get('llm_max_summary_tokens', LLM_DEFAULT_MAX_SUMMARY_TOKENS) or LLM_DEFAULT_MAX_SUMMARY_TOKENS),
|
||||
),
|
||||
extra_body=_extra_body,
|
||||
)
|
||||
raw, tokens = _resp[0], _resp[1]
|
||||
input_tokens = _resp[2] if len(_resp) > 2 else 0
|
||||
output_tokens = _resp[3] if len(_resp) > 3 else 0
|
||||
summary = raw.strip()
|
||||
_check_token_budget(watch, cfg, tokens)
|
||||
watch['llm_last_tokens_used'] = tokens
|
||||
watch['llm_tokens_used_cumulative'] = (watch.get('llm_tokens_used_cumulative') or 0) + tokens
|
||||
accumulate_global_tokens(datastore, tokens,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
model=cfg['model'])
|
||||
logger.debug(
|
||||
f"LLM change summary {watch.get('uuid')}: tokens={tokens} "
|
||||
f"summary={summary[:80]}"
|
||||
)
|
||||
return summary
|
||||
except Exception as e:
|
||||
raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Live-preview extraction (current content, no diff)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def preview_extract(watch, datastore, content: str) -> dict | None:
|
||||
"""
|
||||
For the live-preview endpoint: extract relevant information from the
|
||||
*current* page content according to the watch's intent.
|
||||
|
||||
Unlike evaluate_change (which compares a diff), this asks the LLM to
|
||||
directly answer the intent against the current snapshot — giving the user
|
||||
immediate feedback like "30 articles listed" or "Price: $149, 25% off".
|
||||
|
||||
Returns {'found': bool, 'answer': str} or None if LLM not configured / no intent.
|
||||
"""
|
||||
cfg = get_llm_config(datastore)
|
||||
if not cfg:
|
||||
return None
|
||||
|
||||
intent, _ = resolve_intent(watch, datastore)
|
||||
if not intent or not content.strip():
|
||||
return None
|
||||
|
||||
_check_input_size(content, _get_max_input_chars(datastore))
|
||||
url = watch.get('url', '')
|
||||
title = watch.get('page_title') or watch.get('title') or ''
|
||||
|
||||
system_prompt = build_preview_system_prompt()
|
||||
user_prompt = build_preview_prompt(intent, content, url=url, title=title)
|
||||
|
||||
try:
|
||||
raw, tokens, *_ = llm_client.completion(
|
||||
model=cfg['model'],
|
||||
messages=[
|
||||
_cached_system(system_prompt, model=cfg['model']),
|
||||
{'role': 'user', 'content': user_prompt},
|
||||
],
|
||||
api_key=cfg.get('api_key'),
|
||||
api_base=cfg.get('api_base'),
|
||||
extra_body=_thinking_extra_body(cfg['model'], int(datastore.data['settings']['application'].get('llm_thinking_budget', LLM_DEFAULT_THINKING_BUDGET) or 0)),
|
||||
)
|
||||
accumulate_global_tokens(datastore, tokens, model=cfg['model'])
|
||||
result = parse_preview_response(raw)
|
||||
logger.debug(
|
||||
f"LLM preview {watch.get('uuid')}: found={result['found']} "
|
||||
f"tokens={tokens} answer={result['answer'][:80]}"
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM preview extraction failed for {watch.get('uuid')}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-change evaluation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '') -> dict | None:
|
||||
"""
|
||||
Evaluate whether `diff` matches the watch's intent.
|
||||
Returns {'important': bool, 'summary': str} or None if LLM not configured / no intent.
|
||||
|
||||
Results are cached by (intent, diff) hash — each unique diff is evaluated exactly once.
|
||||
"""
|
||||
cfg = get_llm_config(datastore)
|
||||
if not cfg:
|
||||
return None
|
||||
|
||||
intent, source = resolve_intent(watch, datastore)
|
||||
if not intent:
|
||||
return None
|
||||
|
||||
if not diff or not diff.strip():
|
||||
return {'important': False, 'summary': ''}
|
||||
|
||||
_check_input_size(diff, _get_max_input_chars(datastore))
|
||||
|
||||
# Cache lookup — evaluations are deterministic once cached
|
||||
cache_key = hashlib.sha256(f"{intent}||{diff}".encode()).hexdigest()
|
||||
cache = watch.get('llm_evaluation_cache') or {}
|
||||
if cache_key in cache:
|
||||
logger.debug(f"LLM cache hit for {watch.get('uuid')} key={cache_key[:8]}")
|
||||
return cache[cache_key]
|
||||
|
||||
# Check global monthly budget before making the call
|
||||
if is_global_token_budget_exceeded(datastore):
|
||||
budget = get_global_token_budget_month(datastore)
|
||||
llm_cfg = datastore.data['settings']['application'].get('llm') or {}
|
||||
used = llm_cfg.get('tokens_this_month', 0)
|
||||
logger.warning(
|
||||
f"LLM evaluate_change skipped for {watch.get('uuid')}: monthly budget {budget:,} reached "
|
||||
f"({used:,} used this month) — passing change through as important"
|
||||
)
|
||||
# Fail open: don't suppress notifications when budget is exhausted
|
||||
return {'important': True, 'summary': ''}
|
||||
|
||||
# Check per-watch cumulative budget before making the call
|
||||
if not _check_token_budget(watch, cfg):
|
||||
# Already over budget — fail open (don't suppress notification)
|
||||
return {'important': True, 'summary': ''}
|
||||
|
||||
url = watch.get('url', '')
|
||||
title = watch.get('page_title') or watch.get('title') or ''
|
||||
|
||||
system_prompt = build_eval_system_prompt()
|
||||
user_prompt = build_eval_prompt(
|
||||
intent=intent,
|
||||
diff=diff,
|
||||
current_snapshot=current_snapshot,
|
||||
url=url,
|
||||
title=title,
|
||||
)
|
||||
|
||||
try:
|
||||
_resp = llm_client.completion(
|
||||
model=cfg['model'],
|
||||
messages=[
|
||||
_cached_system(system_prompt, model=cfg['model']),
|
||||
{'role': 'user', 'content': user_prompt},
|
||||
],
|
||||
api_key=cfg.get('api_key'),
|
||||
api_base=cfg.get('api_base'),
|
||||
extra_body=_thinking_extra_body(cfg['model'], int(datastore.data['settings']['application'].get('llm_thinking_budget', LLM_DEFAULT_THINKING_BUDGET) or 0)),
|
||||
)
|
||||
raw, tokens = _resp[0], _resp[1]
|
||||
input_tokens = _resp[2] if len(_resp) > 2 else 0
|
||||
output_tokens = _resp[3] if len(_resp) > 3 else 0
|
||||
result = parse_eval_response(raw)
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM evaluation failed for {watch.get('uuid')}: {e}")
|
||||
# On failure: don't suppress the notification — pass through as important
|
||||
watch['llm_last_tokens_used'] = 0
|
||||
return {'important': True, 'summary': ''}
|
||||
|
||||
# Accumulate token usage: per-watch limit and global monthly budget
|
||||
_check_token_budget(watch, cfg, tokens)
|
||||
watch['llm_last_tokens_used'] = tokens
|
||||
accumulate_global_tokens(datastore, tokens,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
model=cfg['model'])
|
||||
|
||||
# Store in cache
|
||||
if 'llm_evaluation_cache' not in watch or watch['llm_evaluation_cache'] is None:
|
||||
watch['llm_evaluation_cache'] = {}
|
||||
watch['llm_evaluation_cache'][cache_key] = result
|
||||
|
||||
logger.debug(
|
||||
f"LLM eval {watch.get('uuid')} (intent from {source}): "
|
||||
f"important={result['important']} tokens={tokens} summary={result['summary'][:80]}"
|
||||
)
|
||||
return result
|
||||
@@ -1,212 +0,0 @@
|
||||
"""
|
||||
Prompt construction for LLM evaluation calls.
|
||||
Pure functions — no side effects, fully testable.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from .bm25_trim import trim_to_relevant
|
||||
|
||||
_AGO_RE = re.compile(r'^\d+\s+\w+\s+ago$', re.IGNORECASE)
|
||||
|
||||
SNAPSHOT_CONTEXT_CHARS = 3_000 # current page state excerpt sent alongside the diff
|
||||
|
||||
|
||||
def _annotate_moved_lines(diff_text: str) -> str:
|
||||
"""
|
||||
Pre-process a unified diff to mark lines that appear on both the + and - sides
|
||||
as [MOVED] rather than genuinely added/removed. This prevents the LLM from
|
||||
incorrectly classifying repositioned content as new or deleted.
|
||||
|
||||
Lines are compared after stripping leading +/- and whitespace so that
|
||||
indentation changes don't prevent matching.
|
||||
"""
|
||||
lines = diff_text.splitlines()
|
||||
added_texts = {l[1:].strip().lower() for l in lines if l.startswith('+') and l[1:].strip()}
|
||||
removed_texts = {l[1:].strip().lower() for l in lines if l.startswith('-') and l[1:].strip()}
|
||||
moved_texts = added_texts & removed_texts
|
||||
|
||||
if not moved_texts:
|
||||
return diff_text
|
||||
|
||||
result = []
|
||||
for line in lines:
|
||||
if line.startswith(('+', '-')):
|
||||
bare = line[1:].strip().lower()
|
||||
if bare in moved_texts or _AGO_RE.match(line[1:].strip()):
|
||||
result.append(f'~{line[1:]}') # ~ prefix = moved/reordered/trivial, skip
|
||||
continue
|
||||
result.append(line)
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
def build_eval_prompt(intent: str, diff: str, current_snapshot: str = '',
|
||||
url: str = '', title: str = '') -> str:
|
||||
"""
|
||||
Build the user message for a diff evaluation call.
|
||||
The system prompt is kept separate (see build_eval_system_prompt).
|
||||
"""
|
||||
parts = []
|
||||
|
||||
if url:
|
||||
parts.append(f"URL: {url}")
|
||||
if title:
|
||||
parts.append(f"Page title: {title}")
|
||||
|
||||
parts.append(f"Intent: {intent}")
|
||||
|
||||
if current_snapshot:
|
||||
excerpt = trim_to_relevant(current_snapshot, intent, max_chars=SNAPSHOT_CONTEXT_CHARS)
|
||||
if excerpt:
|
||||
parts.append(f"\nCurrent page state (relevant excerpt):\n{excerpt}")
|
||||
|
||||
parts.append(f"\nWhat changed (diff):\n{diff}")
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def build_eval_system_prompt() -> str:
|
||||
return (
|
||||
"You are a precise, reliable website-change evaluator for a monitoring tool.\n"
|
||||
"Your job is to read a unified diff and decide whether it matches a user's stated intent.\n"
|
||||
"Accuracy is critical — false positives waste the user's attention; false negatives miss what they care about.\n\n"
|
||||
"Diff format:\n"
|
||||
"- Lines starting with '+' are newly ADDED content\n"
|
||||
"- Lines starting with '-' are REMOVED content\n"
|
||||
"- Lines starting with ' ' (space) are unchanged context\n\n"
|
||||
"Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
|
||||
'{"important": true/false, "summary": "one sentence describing the relevant change, or why it doesn\'t match"}\n\n'
|
||||
"Rules:\n"
|
||||
"- important=true ONLY when the diff clearly and specifically matches the intent — be strict\n"
|
||||
"- Pay close attention to direction: an intent about price drops means removed (-) prices and added (+) lower prices\n"
|
||||
"- Empty, trivial, or cosmetic diffs (timestamps, counters, whitespace, navigation) → important=false\n"
|
||||
"- If the same text appears in both removed (-) and added (+) lines the content has likely just "
|
||||
"shifted or been reordered. Treat pure reordering as important=false unless the intent "
|
||||
"explicitly asks about order or position.\n"
|
||||
"- Use OR logic when the intent lists multiple triggers — any one matching is sufficient\n"
|
||||
"- When uncertain whether a change truly matches, prefer important=false and explain why in the summary\n"
|
||||
"- Summary must be in the same language as the intent\n"
|
||||
"- If important=false, the summary must clearly explain what changed and why it does not match"
|
||||
)
|
||||
|
||||
|
||||
def build_preview_prompt(intent: str, content: str, url: str = '', title: str = '') -> str:
|
||||
"""
|
||||
Build the user message for a live-preview extraction call.
|
||||
Unlike build_eval_prompt (which analyses a diff), this asks the LLM to
|
||||
extract relevant information from the *current* page content — giving the
|
||||
user a direct answer to their intent so they can verify it makes sense
|
||||
before saving.
|
||||
"""
|
||||
parts = []
|
||||
if url:
|
||||
parts.append(f"URL: {url}")
|
||||
if title:
|
||||
parts.append(f"Page title: {title}")
|
||||
parts.append(f"Intent / question: {intent}")
|
||||
parts.append(f"\nPage content:\n{content[:6_000]}")
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def build_preview_system_prompt() -> str:
|
||||
return (
|
||||
"You are a precise, detail-oriented web page content analyst for a website monitoring tool.\n"
|
||||
"Given the user's intent or question and the current page content, extract and directly answer "
|
||||
"what the intent is looking for. Never guess or paraphrase — report only what the page actually contains.\n\n"
|
||||
"Respond with ONLY a JSON object — no markdown, no explanation outside it:\n"
|
||||
'{"found": true/false, "answer": "concise direct answer or extraction"}\n\n'
|
||||
"Rules:\n"
|
||||
"- found=true when the page clearly contains something relevant to the intent\n"
|
||||
"- answer must directly address the intent with specific values where possible "
|
||||
"(e.g. for 'current price?' → '$149.99', not 'a price is shown')\n"
|
||||
"- answer must be in the same language as the intent\n"
|
||||
"- Keep answer brief — one or two sentences maximum\n"
|
||||
"- If found=false, briefly state what the page contains instead"
|
||||
)
|
||||
|
||||
|
||||
def build_change_summary_prompt(diff: str, custom_prompt: str,
|
||||
current_snapshot: str = '', url: str = '', title: str = '') -> str:
|
||||
"""
|
||||
Build the user message for an AI Change Summary call.
|
||||
The user supplies their own instructions (custom_prompt); this wraps them
|
||||
with the diff and optional page context.
|
||||
"""
|
||||
parts = []
|
||||
if url:
|
||||
parts.append(f"URL: {url}")
|
||||
if title:
|
||||
parts.append(f"Page title: {title}")
|
||||
parts.append(f"Instructions: {custom_prompt}")
|
||||
if current_snapshot:
|
||||
excerpt = trim_to_relevant(current_snapshot, custom_prompt, max_chars=2_000)
|
||||
if excerpt:
|
||||
parts.append(f"\nCurrent page (excerpt):\n{excerpt}")
|
||||
parts.append(f"\nWhat changed (diff):\n{_annotate_moved_lines(diff)}")
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def build_change_summary_system_prompt() -> str:
|
||||
return (
|
||||
"You are a meticulous, accurate summariser of website changes for monitoring notifications.\n"
|
||||
"Your goal is to describe exactly what changed — never omit significant details, "
|
||||
"never add information that isn't in the diff, and never speculate.\n\n"
|
||||
"Rules for reading the diff:\n"
|
||||
"- Lines starting with + are genuinely new content. List them specifically.\n"
|
||||
"- Lines starting with - are genuinely removed content. List them specifically.\n"
|
||||
"- Lines starting with ~ have been PRE-IDENTIFIED as moved/reordered or trivial — "
|
||||
"the same text exists on both sides of the diff, or the line is a standalone timestamp. "
|
||||
"Do NOT report ~ lines as added or removed. "
|
||||
"If many ~ lines exist, note briefly that some content was reordered.\n"
|
||||
"- Never list standalone timestamps like '3 hours ago', 'Yesterday', '2 minutes ago' "
|
||||
"as added or removed items — they are not meaningful content changes.\n"
|
||||
"For content-heavy pages (news, listings, feeds): quote or paraphrase the specific new "
|
||||
"headlines, items, or entries that were added — do not collapse them into vague phrases "
|
||||
"like 'new articles were added' or 'section was expanded'.\n"
|
||||
"For large blocks of new text (full articles, documents, long paragraphs): briefly summarise "
|
||||
"the substance in 1-2 sentences capturing the key point — do not just repeat the title.\n\n"
|
||||
"Structure your response using these sections, in this fixed order — "
|
||||
"omit a section entirely if there is nothing to report for it:\n"
|
||||
" Added: ...\n"
|
||||
" Changed: ...\n"
|
||||
" Removed: ...\n"
|
||||
"The Removed section MUST always be last. Never place removals before additions or changes.\n\n"
|
||||
"Follow the user's formatting instructions exactly for structure, language, and length.\n"
|
||||
"Respond with ONLY the summary text — no JSON, no markdown code fences, no preamble. "
|
||||
"Just the description."
|
||||
)
|
||||
|
||||
|
||||
def build_setup_prompt(intent: str, snapshot_text: str, url: str = '') -> str:
|
||||
"""
|
||||
Build the prompt for the one-time setup call that decides whether
|
||||
a CSS pre-filter would improve evaluation precision.
|
||||
"""
|
||||
excerpt = trim_to_relevant(snapshot_text, intent, max_chars=4_000)
|
||||
|
||||
parts = []
|
||||
if url:
|
||||
parts.append(f"URL: {url}")
|
||||
parts.append(f"Intent: {intent}")
|
||||
parts.append(f"\nPage content excerpt:\n{excerpt}")
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def build_setup_system_prompt() -> str:
|
||||
return (
|
||||
"You help configure a website change monitor.\n"
|
||||
"Given a monitoring intent and a sample of the page content, decide if a CSS pre-filter "
|
||||
"would improve evaluation precision by scoping the content to a specific structural section.\n\n"
|
||||
"Respond with ONLY a JSON object:\n"
|
||||
'{"needs_prefilter": true/false, "selector": "CSS selector or null", "reason": "one sentence"}\n\n'
|
||||
"Rules:\n"
|
||||
"- Only recommend a pre-filter when the intent references a specific structural section "
|
||||
"(e.g. 'footer', 'sidebar', 'nav', 'header', 'main', 'article') OR the page clearly "
|
||||
"has high-noise sections unrelated to the intent\n"
|
||||
"- Use ONLY semantic element selectors: footer, nav, header, main, article, aside, "
|
||||
"or attribute-based like [id*='price'], [class*='sidebar'] — NEVER positional selectors "
|
||||
"like div:nth-child(3) or //*[2]\n"
|
||||
"- Default to needs_prefilter=false — most intents don't need one\n"
|
||||
"- selector must be null when needs_prefilter=false"
|
||||
)
|
||||
@@ -1,84 +0,0 @@
|
||||
"""
|
||||
Parse and validate LLM JSON responses.
|
||||
Pure functions — no side effects, fully testable.
|
||||
|
||||
LLMs occasionally return JSON wrapped in markdown fences or with trailing
|
||||
text. This module handles those cases gracefully.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
# Positional selectors are fragile — reject them even if the LLM generates them
|
||||
_POSITIONAL_SELECTOR_RE = re.compile(
|
||||
r'nth-child|nth-of-type|:eq\(|\[\d+\]|\/\/\*\[\d',
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
def _extract_json(raw: str) -> str:
|
||||
"""Strip markdown fences and extract the first JSON object."""
|
||||
raw = raw.strip()
|
||||
# Remove ```json ... ``` or ``` ... ``` fences
|
||||
raw = re.sub(r'^```(?:json)?\s*', '', raw, flags=re.MULTILINE)
|
||||
raw = re.sub(r'\s*```$', '', raw, flags=re.MULTILINE)
|
||||
# Find the first { ... } block
|
||||
match = re.search(r'\{.*\}', raw, re.DOTALL)
|
||||
return match.group(0) if match else raw
|
||||
|
||||
|
||||
def parse_eval_response(raw: str) -> dict:
|
||||
"""
|
||||
Parse a diff evaluation response.
|
||||
Returns {'important': bool, 'summary': str}.
|
||||
Falls back to important=False on any parse error.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(_extract_json(raw))
|
||||
return {
|
||||
'important': bool(data.get('important', False)),
|
||||
'summary': str(data.get('summary', '')).strip(),
|
||||
}
|
||||
except (json.JSONDecodeError, AttributeError):
|
||||
return {'important': False, 'summary': ''}
|
||||
|
||||
|
||||
def parse_preview_response(raw: str) -> dict:
|
||||
"""
|
||||
Parse a live-preview extraction response.
|
||||
Returns {'found': bool, 'answer': str}.
|
||||
Falls back to found=False on any parse error.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(_extract_json(raw))
|
||||
return {
|
||||
'found': bool(data.get('found', False)),
|
||||
'answer': str(data.get('answer', '')).strip(),
|
||||
}
|
||||
except (json.JSONDecodeError, AttributeError):
|
||||
return {'found': False, 'answer': ''}
|
||||
|
||||
|
||||
def parse_setup_response(raw: str) -> dict:
|
||||
"""
|
||||
Parse a setup/pre-filter decision response.
|
||||
Returns {'needs_prefilter': bool, 'selector': str|None, 'reason': str}.
|
||||
Rejects positional selectors even if the LLM generates them.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(_extract_json(raw))
|
||||
needs = bool(data.get('needs_prefilter', False))
|
||||
selector = data.get('selector') or None
|
||||
|
||||
# Sanitise: reject positional selectors
|
||||
if selector and _POSITIONAL_SELECTOR_RE.search(selector):
|
||||
selector = None
|
||||
needs = False
|
||||
|
||||
return {
|
||||
'needs_prefilter': needs,
|
||||
'selector': selector if needs else None,
|
||||
'reason': str(data.get('reason', '')).strip(),
|
||||
}
|
||||
except (json.JSONDecodeError, AttributeError):
|
||||
return {'needs_prefilter': False, 'selector': None, 'reason': ''}
|
||||
@@ -1,18 +0,0 @@
|
||||
"""
|
||||
Shared UI placeholder strings for LLM fields.
|
||||
|
||||
Used by WTForms field definitions in forms.py and blueprint/tags/form.py.
|
||||
Templates use their own _()-translated variants but should stay in sync with these.
|
||||
"""
|
||||
|
||||
# llm_intent field — placeholder text for per-watch context
|
||||
LLM_INTENT_WATCH_PLACEHOLDER = (
|
||||
"e.g. Alert me when the price drops below $300, or a new product is launched. "
|
||||
"Ignore footer and navigation changes."
|
||||
)
|
||||
|
||||
# llm_intent field — placeholder text for tag/group context
|
||||
LLM_INTENT_TAG_PLACEHOLDER = (
|
||||
"e.g. Flag price changes or new product launches across all watches in this group"
|
||||
)
|
||||
|
||||
@@ -2,8 +2,6 @@ from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
||||
from changedetectionio.llm.evaluator import LLM_DEFAULT_MAX_SUMMARY_TOKENS, LLM_DEFAULT_THINKING_BUDGET
|
||||
from changedetectionio.model.Tags import TagsDict
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
@@ -70,9 +68,7 @@ class model(dict):
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'strip_ignored_lines': False,
|
||||
'tags': None, # Initialized in __init__ with real datastore_path
|
||||
'llm_thinking_budget': LLM_DEFAULT_THINKING_BUDGET,
|
||||
'llm_max_summary_tokens': LLM_DEFAULT_MAX_SUMMARY_TOKENS,
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'ui': {
|
||||
'use_page_title_in_list': True,
|
||||
@@ -84,16 +80,10 @@ class model(dict):
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *arg, datastore_path=None, **kw):
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
# Capture any tags data passed in before base_config overwrites the structure
|
||||
existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
|
||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||
self.update(deepcopy(self.base_config))
|
||||
# TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
|
||||
if datastore_path is None:
|
||||
raise ValueError("App.model() requires 'datastore_path' keyword argument")
|
||||
self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
|
||||
@@ -46,26 +46,11 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
def matches_url(self, url: str) -> bool:
|
||||
"""Return True if this tag should be auto-applied to the given watch URL.
|
||||
|
||||
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
|
||||
substring match. Returns False if no pattern is configured.
|
||||
"""
|
||||
import fnmatch
|
||||
pattern = self.get('url_match_pattern', '').strip()
|
||||
if not pattern or not url:
|
||||
return False
|
||||
if any(c in pattern for c in ('*', '?', '[')):
|
||||
return fnmatch.fnmatch(url.lower(), pattern.lower())
|
||||
return pattern.lower() in url.lower()
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
|
||||
class TagsDict(dict):
|
||||
"""Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
|
||||
|
||||
def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
|
||||
self._datastore_path = Path(datastore_path)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
super().__delitem__(key)
|
||||
tag_dir = self._datastore_path / key
|
||||
tag_json_file = tag_dir / "tag.json"
|
||||
if not os.path.exists(tag_json_file):
|
||||
logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
|
||||
return
|
||||
try:
|
||||
shutil.rmtree(tag_dir)
|
||||
logger.info(f"Deleted tag directory for tag {key!r}")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
|
||||
|
||||
def pop(self, key: str, default=_SENTINEL):
|
||||
"""Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
|
||||
if key in self:
|
||||
value = self[key]
|
||||
del self[key]
|
||||
return value
|
||||
if default is _SENTINEL:
|
||||
raise KeyError(key)
|
||||
return default
|
||||
@@ -43,11 +43,6 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||
_FAVICON_FILENAME_CACHE: dict = {}
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
@@ -388,25 +383,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
return self.get('fetch_backend')
|
||||
|
||||
@property
|
||||
def fetcher_supports_screenshots(self):
|
||||
"""Return True if the fetcher configured for this watch supports screenshots.
|
||||
|
||||
Resolves 'system' via self._datastore, then checks supports_screenshots on
|
||||
the actual fetcher class. Works for built-in and plugin fetchers alike.
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
|
||||
fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests
|
||||
if not fetcher_name or fetcher_name == 'system':
|
||||
fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||
if fetcher_class is None:
|
||||
return False
|
||||
|
||||
return bool(getattr(fetcher_class, 'supports_screenshots', False))
|
||||
|
||||
@property
|
||||
def is_pdf(self):
|
||||
url = str(self.get("url") or "").lower()
|
||||
@@ -798,50 +774,24 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
# Also in the case that the file didnt exist
|
||||
return True
|
||||
|
||||
def bump_favicon(self, url, favicon_base_64: str, mime_type: str = None) -> None:
|
||||
def bump_favicon(self, url, favicon_base_64: str) -> None:
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
import binascii
|
||||
import re
|
||||
decoded = None
|
||||
|
||||
MAX_FAVICON_BYTES = 1 * 1024 * 1024 # 1 MB
|
||||
|
||||
MIME_TO_EXT = {
|
||||
'image/png': 'png',
|
||||
'image/x-icon': 'ico',
|
||||
'image/vnd.microsoft.icon': 'ico',
|
||||
'image/jpeg': 'jpg',
|
||||
'image/gif': 'gif',
|
||||
'image/svg+xml': 'svg',
|
||||
'image/webp': 'webp',
|
||||
'image/bmp': 'bmp',
|
||||
}
|
||||
|
||||
extension = None
|
||||
|
||||
# If the caller already resolved the MIME type (e.g. from blob.type or a data URI),
|
||||
# use that directly — it's more reliable than guessing from a URL path.
|
||||
if mime_type:
|
||||
extension = MIME_TO_EXT.get(mime_type.lower().split(';')[0].strip(), None)
|
||||
|
||||
# Fall back to extracting extension from URL path, unless it's a data URI.
|
||||
if not extension and url and not url.startswith('data:'):
|
||||
if url:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
filename = os.path.basename(parsed.path)
|
||||
(_base, ext) = filename.lower().strip().rsplit('.', 1)
|
||||
extension = ext
|
||||
(base, extension) = filename.lower().strip().rsplit('.', 1)
|
||||
except ValueError:
|
||||
logger.warning(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}', defaulting to ico")
|
||||
|
||||
# Handle data URIs: extract MIME type from the URI itself when not already known
|
||||
if not extension and url and url.startswith('data:'):
|
||||
m = re.match(r'^data:([^;]+);base64,', url)
|
||||
if m:
|
||||
extension = MIME_TO_EXT.get(m.group(1).lower(), None)
|
||||
|
||||
if not extension:
|
||||
extension = 'ico'
|
||||
logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
|
||||
return None
|
||||
else:
|
||||
# Assume favicon.ico
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.data_dir, f"favicon.{extension}")
|
||||
|
||||
@@ -850,50 +800,58 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
decoded = base64.b64decode(favicon_base_64, validate=True)
|
||||
except (binascii.Error, ValueError) as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
|
||||
return None
|
||||
else:
|
||||
if decoded:
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
if len(decoded) > MAX_FAVICON_BYTES:
|
||||
logger.warning(f"UUID: {self.get('uuid')} Favicon too large ({len(decoded)} bytes), skipping")
|
||||
return None
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
||||
|
||||
# Invalidate module-level favicon filename cache for this watch
|
||||
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
|
||||
|
||||
# @todo - Store some checksum and only write when its different
|
||||
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the watch data directory.
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||
deepcopy (which drops instance attrs), and concurrent request races.
|
||||
Invalidated by bump_favicon() when a new favicon is saved.
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
|
||||
Returns:
|
||||
str: Basename of the favicon file, or None if not found.
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
"""
|
||||
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
fname = os.path.basename(files[0]) if files else None
|
||||
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||
return fname
|
||||
|
||||
if not files:
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -1001,38 +959,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
return False
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _llm_summary_prompt_hash(prompt: str) -> str:
|
||||
"""8-char hex hash of the prompt — used to detect when the prompt changes."""
|
||||
import hashlib
|
||||
return hashlib.md5(prompt.encode('utf-8', errors='replace')).hexdigest()[:8]
|
||||
|
||||
def get_llm_diff_summary(self, from_version, to_version, prompt: str = '') -> str:
|
||||
"""Return the cached AI Change Summary for this from→to + prompt combination, or ''.
|
||||
|
||||
The prompt hash is embedded in the filename so that a changed prompt
|
||||
automatically produces a cache miss and triggers regeneration.
|
||||
"""
|
||||
prompt_hash = self._llm_summary_prompt_hash(prompt)
|
||||
fname = os.path.join(self.data_dir, f'change-summary-{from_version}-to-{to_version}-{prompt_hash}.txt')
|
||||
if not os.path.isfile(fname):
|
||||
return ''
|
||||
with open(fname, 'r', encoding='utf-8') as f:
|
||||
return f.read().strip()
|
||||
|
||||
def save_llm_diff_summary(self, summary: str, from_version, to_version, prompt: str = ''):
|
||||
"""Persist the AI Change Summary keyed by version pair + prompt hash."""
|
||||
self.ensure_data_dir_exists()
|
||||
prompt_hash = self._llm_summary_prompt_hash(prompt)
|
||||
fname = os.path.join(self.data_dir, f'change-summary-{from_version}-to-{to_version}-{prompt_hash}.txt')
|
||||
tmp = fname + '.tmp'
|
||||
try:
|
||||
with open(tmp, 'w', encoding='utf-8') as f:
|
||||
f.write(summary)
|
||||
os.replace(tmp, fname)
|
||||
except OSError as e:
|
||||
logger.warning(f"Could not write LLM summary cache {fname}: {e}")
|
||||
|
||||
def pause(self):
|
||||
self['paused'] = True
|
||||
|
||||
@@ -1256,13 +1182,18 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for, has_request_context
|
||||
from flask import url_for
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
has_app_context = has_request_context()
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
|
||||
@@ -186,13 +186,7 @@ class watch_base(dict):
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'content-type': None,
|
||||
'date_created': None,
|
||||
'extract_lines_containing': [], # Keep only lines containing these substrings (plain text, case-insensitive)
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
# LLM intent-based evaluation
|
||||
'llm_intent': '', # Plain-English description of what the user cares about (change filter)
|
||||
'llm_change_summary': '', # Prompt for AI Change Summary — replaces {{ diff }} in notifications
|
||||
'llm_prefilter': None, # CSS selector derived at setup time (semantic only, e.g. "footer")
|
||||
'llm_evaluation_cache': {}, # {sha256(intent+diff): {important, summary}} - evaluated once, cached
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'fetch_time': 0.0,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
@@ -343,17 +337,8 @@ class watch_base(dict):
|
||||
# These are set by processors/workers and should not trigger edited flag
|
||||
additional_system_fields = {
|
||||
'last_check_status', # Set by processors
|
||||
'last_filter_config_hash', # Set by text_json_diff processor, internal skip-cache
|
||||
'restock', # Set by restock processor
|
||||
'last_viewed', # Set by mark_all_viewed endpoint
|
||||
# LLM runtime fields written back by worker/evaluator
|
||||
'_llm_result',
|
||||
'_llm_intent',
|
||||
'_llm_change_summary',
|
||||
'llm_prefilter',
|
||||
'llm_evaluation_cache',
|
||||
'llm_last_tokens_used',
|
||||
'llm_tokens_used_cumulative',
|
||||
}
|
||||
|
||||
# Only mark as edited if this is a user-writable field
|
||||
|
||||
@@ -48,9 +48,8 @@ To verify this works:
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import unquote_plus, urlparse
|
||||
from urllib.parse import unquote_plus
|
||||
|
||||
import requests
|
||||
from apprise import plugins
|
||||
@@ -60,8 +59,6 @@ from apprise.utils.logic import dict_full_update
|
||||
from loguru import logger
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
|
||||
SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
|
||||
|
||||
|
||||
@@ -198,15 +195,6 @@ def apprise_http_custom_handler(
|
||||
|
||||
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
|
||||
|
||||
# SSRF protection — block private/loopback addresses unless explicitly allowed
|
||||
if not os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', '').lower() in ('true', '1', 'yes'):
|
||||
hostname = urlparse(url).hostname or ''
|
||||
if hostname and is_private_hostname(hostname):
|
||||
raise ValueError(
|
||||
f"Notification target '{hostname}' is a private/reserved address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
|
||||
)
|
||||
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
|
||||
@@ -259,12 +259,9 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
|
||||
or url.startswith('https://discord.com/api'))\
|
||||
and 'html' in requested_output_format:
|
||||
# Discord doesn't render HTML — convert markup to plain text equivalents.
|
||||
# is injected upstream to preserve double-spaces for HTML email clients;
|
||||
# Discord displays it as the literal string " " so strip it here.
|
||||
# Discord doesn't support HTML, replace <br> with newlines
|
||||
n_body = n_body.strip().replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = n_body.replace(' ', ' ')
|
||||
n_body = newline_re.sub('\n', n_body)
|
||||
|
||||
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
|
||||
@@ -364,21 +361,6 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
)
|
||||
)
|
||||
|
||||
# {{ raw_diff }} always holds the actual diff regardless of AI Change Summary
|
||||
n_object['raw_diff'] = n_object.get('diff', '')
|
||||
|
||||
# AI Change Summary: optionally replace {{ diff }} with the AI summary
|
||||
_llm_change_summary = (n_object.get('_llm_change_summary') or '').strip()
|
||||
_override_diff = datastore.data['settings']['application'].get('llm_override_diff_with_summary', True)
|
||||
if _llm_change_summary and _override_diff:
|
||||
n_object['diff'] = _llm_change_summary
|
||||
|
||||
# Lazily populate llm_summary / llm_intent if used in notification template
|
||||
scan_text = n_object.get('notification_body', '') + n_object.get('notification_title', '')
|
||||
if 'llm_summary' in scan_text or 'llm_intent' in scan_text or 'raw_diff' in scan_text:
|
||||
n_object['llm_summary'] = _llm_change_summary or (n_object.get('_llm_result') or {}).get('summary', '')
|
||||
n_object['llm_intent'] = n_object.get('_llm_intent', '')
|
||||
|
||||
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
|
||||
for url in n_object['notification_urls']:
|
||||
|
||||
|
||||
@@ -54,155 +54,34 @@ def _check_cascading_vars(datastore, var_name, watch):
|
||||
return None
|
||||
|
||||
|
||||
class FormattableTimestamp(str):
|
||||
"""
|
||||
A str subclass representing a formatted datetime. As a plain string it renders
|
||||
with the default format, but can also be called with a custom format argument
|
||||
in Jinja2 templates:
|
||||
|
||||
{{ change_datetime }} → '2024-01-15 10:30:00 UTC'
|
||||
{{ change_datetime(format='%Y') }} → '2024'
|
||||
{{ change_datetime(format='%A') }} → 'Monday'
|
||||
{{ change_datetime(format='%Y-%m-%d') }} → '2024-01-15'
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
_DEFAULT_FORMAT = '%Y-%m-%d %H:%M:%S %Z'
|
||||
|
||||
def __new__(cls, timestamp):
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
dt_local = dt.astimezone(local_tz)
|
||||
try:
|
||||
formatted = dt_local.strftime(cls._DEFAULT_FORMAT)
|
||||
except Exception:
|
||||
formatted = dt_local.isoformat()
|
||||
instance = super().__new__(cls, formatted)
|
||||
instance._dt = dt_local
|
||||
return instance
|
||||
|
||||
def __call__(self, format=_DEFAULT_FORMAT):
|
||||
try:
|
||||
return self._dt.strftime(format)
|
||||
except Exception:
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableExtract(str):
|
||||
"""
|
||||
A str subclass that holds only the extracted changed fragments from a diff.
|
||||
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
|
||||
|
||||
{{ diff_changed_from }} → old value(s) only, e.g. "$99.99"
|
||||
{{ diff_changed_to }} → new value(s) only, e.g. "$109.99"
|
||||
|
||||
Multiple changed fragments are joined with newlines.
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
# word_diff=True is required — placemarker extraction regexes only exist in word-diff output
|
||||
raw = diff_module.render_diff(prev_snapshot or '', current_snapshot or '', word_diff=True)
|
||||
extracted = extract_fn(raw)
|
||||
else:
|
||||
extracted = ''
|
||||
instance = super().__new__(cls, extracted)
|
||||
return instance
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
with the default options for that variant, but can be called with custom
|
||||
arguments in Jinja2 templates:
|
||||
|
||||
{{ diff }} → default diff output
|
||||
{{ diff(lines=5) }} → truncate to 5 lines
|
||||
{{ diff(added_only=true) }} → only show added lines
|
||||
{{ diff(removed_only=true) }} → only show removed lines
|
||||
{{ diff(context=3) }} → 3 lines of context around changes
|
||||
{{ diff(word_diff=false) }} → line-level diff instead of word-level
|
||||
{{ diff(lines=10, added_only=true) }} → combine args
|
||||
{{ diff_added(lines=5) }} → works on any diff_* variant too
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
|
||||
else:
|
||||
rendered = ''
|
||||
instance = super().__new__(cls, rendered)
|
||||
instance._prev = prev_snapshot
|
||||
instance._current = current_snapshot
|
||||
instance._base_kwargs = base_kwargs
|
||||
return instance
|
||||
|
||||
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
|
||||
word_diff=None, case_insensitive=False, ignore_junk=False):
|
||||
from changedetectionio import diff as diff_module
|
||||
kwargs = dict(self._base_kwargs)
|
||||
|
||||
if added_only:
|
||||
kwargs['include_removed'] = False
|
||||
if removed_only:
|
||||
kwargs['include_added'] = False
|
||||
if context:
|
||||
kwargs['context_lines'] = int(context)
|
||||
if word_diff is not None:
|
||||
kwargs['word_diff'] = bool(word_diff)
|
||||
if case_insensitive:
|
||||
kwargs['case_insensitive'] = True
|
||||
if ignore_junk:
|
||||
kwargs['ignore_junk'] = True
|
||||
|
||||
result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs)
|
||||
|
||||
if lines is not None:
|
||||
result = '\n'.join(result.splitlines()[:int(lines)])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
# ValidateJinja2Template() validates against the keynames of this dict to check for valid tokens in the body (user submission)
|
||||
super().__init__({
|
||||
'base_url': None,
|
||||
'change_datetime': FormattableTimestamp(time.time()),
|
||||
'current_snapshot': None,
|
||||
'diff': FormattableDiff('', ''),
|
||||
'diff_clean': FormattableDiff('', '', include_change_type_prefix=False),
|
||||
'diff_added': FormattableDiff('', '', include_removed=False),
|
||||
'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False),
|
||||
'diff_full': FormattableDiff('', '', include_equal=True),
|
||||
'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False),
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff': None,
|
||||
'diff_clean': None,
|
||||
'diff_added': None,
|
||||
'diff_added_clean': None,
|
||||
'diff_full': None,
|
||||
'diff_full_clean': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_removed_clean': None,
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
'prev_snapshot': None,
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'timestamp_from': None,
|
||||
'timestamp_to': None,
|
||||
'triggered_text': None,
|
||||
'llm_summary': None, # AI plain-English summary of what changed (requires AI intent to be configured)
|
||||
'llm_intent': None, # The intent that was evaluated (watch-level or inherited from tag)
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
})
|
||||
|
||||
# Apply any initial data passed in
|
||||
@@ -224,7 +103,7 @@ class NotificationContextData(dict):
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'):
|
||||
if key in ['uuid', 'time', 'watch_uuid']:
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
@@ -236,6 +115,24 @@ class NotificationContextData(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def timestamp_to_localtime(timestamp):
|
||||
# Format the date using locale-aware formatting with timezone
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
|
||||
# Get local timezone-aware datetime
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
local_dt = dt.astimezone(local_tz)
|
||||
|
||||
# Format date with timezone - using strftime for locale awareness
|
||||
try:
|
||||
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
|
||||
except:
|
||||
# Fallback if locale issues
|
||||
formatted_date = local_dt.isoformat()
|
||||
|
||||
return formatted_date
|
||||
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||
"""
|
||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||
@@ -253,12 +150,13 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Define base kwargs for each diff variant — these become the stored defaults
|
||||
# on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them
|
||||
# Define specifications for each diff variant
|
||||
diff_specs = {
|
||||
'diff': {'word_diff': word_diff},
|
||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||
@@ -271,27 +169,23 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
from changedetectionio.diff import extract_changed_from, extract_changed_to
|
||||
extract_specs = {
|
||||
'diff_changed_from': extract_changed_from,
|
||||
'diff_changed_to': extract_changed_to,
|
||||
}
|
||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
||||
@lru_cache(maxsize=4)
|
||||
def cached_render(kwargs_tuple):
|
||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
|
||||
# Only check and render diff keys that exist in NotificationContextData
|
||||
for key in NotificationContextData().keys():
|
||||
if not key.startswith('diff'):
|
||||
continue
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
continue
|
||||
if key in diff_specs:
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
elif key in extract_specs:
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
|
||||
rendered_count += 1
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
kwargs = diff_specs[key]
|
||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
@@ -304,7 +198,7 @@ def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggere
|
||||
'current_snapshot': current_snapshot,
|
||||
'prev_snapshot': prev_snapshot,
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'change_datetime': FormattableTimestamp(timestamp_changed) if timestamp_changed else None,
|
||||
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
@@ -413,11 +307,6 @@ class NotificationService:
|
||||
n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch)
|
||||
n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch)
|
||||
|
||||
# Attach LLM results so notification tokens render correctly
|
||||
n_object['_llm_result'] = watch.get('_llm_result')
|
||||
n_object['_llm_intent'] = watch.get('_llm_intent', '')
|
||||
n_object['_llm_change_summary'] = watch.get('_llm_change_summary', '')
|
||||
|
||||
# (Individual watch) Only prepare to notify if the rules above matched
|
||||
queued = False
|
||||
if n_object and n_object.get('notification_urls'):
|
||||
@@ -504,7 +393,7 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
|
||||
'notification_body': body,
|
||||
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class ChangeDetectionSpec:
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url, llm_intent=None):
|
||||
def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url):
|
||||
"""Provide custom implementation of get_itemprop_availability for a specific fetcher.
|
||||
|
||||
This hook allows plugins to provide their own product availability detection
|
||||
@@ -73,7 +73,6 @@ class ChangeDetectionSpec:
|
||||
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
|
||||
fetcher_instance: The fetcher instance that generated the content
|
||||
url: The URL being watched/checked
|
||||
llm_intent: Optional user-supplied intent string (e.g. "alert when price drops below $300")
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with availability data:
|
||||
@@ -130,109 +129,6 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def update_handler_alter(update_handler, watch, datastore):
|
||||
"""Modify or wrap the update_handler before it processes a watch.
|
||||
|
||||
This hook is called after the update_handler (perform_site_check instance) is created
|
||||
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
|
||||
- Wrap the handler to add logging/metrics
|
||||
- Modify handler configuration
|
||||
- Add custom preprocessing logic
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance that will process the watch
|
||||
watch: The watch dict being processed
|
||||
datastore: The application datastore
|
||||
|
||||
Returns:
|
||||
object or None: Return a modified/wrapped handler, or None to keep the original.
|
||||
If multiple plugins return handlers, they are chained in registration order.
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Called after watch processing completes (success or failure).
|
||||
|
||||
This hook is called in the finally block after all processing is complete,
|
||||
allowing plugins to perform cleanup, update metrics, or log final status.
|
||||
|
||||
The plugin can access update_handler.last_logging_insert_id if it was stored
|
||||
during update_handler_alter, and use processing_exception to determine if
|
||||
the processing succeeded or failed.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance (may be None if creation failed)
|
||||
watch: The watch dict that was processed (may be None if not loaded)
|
||||
datastore: The application datastore
|
||||
processing_exception: The exception from the main processing block, or None if successful.
|
||||
This does NOT include cleanup exceptions - only exceptions from
|
||||
the actual watch processing (fetch, diff, etc).
|
||||
|
||||
Returns:
|
||||
None: This hook doesn't return a value
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def get_html_head_extras():
|
||||
"""Return HTML to inject into the <head> of every page via base.html.
|
||||
|
||||
Plugins can use this to add <script>, <style>, or <link> tags that should
|
||||
be present on all pages. Return a raw HTML string or None.
|
||||
|
||||
IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
|
||||
sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
|
||||
work correctly. This hook is called inside a request context so url_for() is
|
||||
always available.
|
||||
|
||||
For small amounts of CSS/JS, return them inline — no file-serving needed::
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
return (
|
||||
'<style>.my-module-banner { color: red; }</style>\\n'
|
||||
'<script>console.log("my_module_content loaded");</script>'
|
||||
)
|
||||
|
||||
For larger assets, register your own lightweight Flask routes in the plugin
|
||||
module and point to them with url_for() so the sub-path prefix is handled
|
||||
automatically::
|
||||
|
||||
from flask import url_for, Response
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.flask_app import app as _app
|
||||
|
||||
MY_CSS = ".my-module-example { color: red; }"
|
||||
MY_JS = "console.log('my_module_content loaded');"
|
||||
|
||||
@_app.route('/my_module_content/css')
|
||||
def my_module_content_css():
|
||||
return Response(MY_CSS, mimetype='text/css',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@_app.route('/my_module_content/js')
|
||||
def my_module_content_js():
|
||||
return Response(MY_JS, mimetype='application/javascript',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
css = url_for('my_module_content_css')
|
||||
js = url_for('my_module_content_js')
|
||||
return (
|
||||
f'<link rel="stylesheet" href="{css}">\\n'
|
||||
f'<script src="{js}" defer></script>'
|
||||
)
|
||||
|
||||
Returns:
|
||||
str or None: Raw HTML string to inject inside <head>, or None
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -242,27 +138,24 @@ plugin_manager.add_hookspecs(ChangeDetectionSpec)
|
||||
|
||||
# Load plugins from subdirectories
|
||||
def load_plugins_from_directories():
|
||||
# List of (python_package_prefix, filesystem_path) pairs to scan for plugins.
|
||||
# NOTE: processors/restock_diff/plugins is intentionally excluded here — those
|
||||
# plugins are registered via register_builtin_restock_plugins() to avoid the
|
||||
# circular import: restock_diff/__init__.py → model.Watch → content_fetchers → pluggy_interface.
|
||||
plugin_dirs = [
|
||||
(
|
||||
'changedetectionio.conditions.plugins',
|
||||
os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
|
||||
),
|
||||
]
|
||||
|
||||
for module_prefix, dir_path in plugin_dirs:
|
||||
# Dictionary of directories to scan for plugins
|
||||
plugin_dirs = {
|
||||
'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
|
||||
# Add more plugin directories here as needed
|
||||
}
|
||||
|
||||
# Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory
|
||||
|
||||
for dir_name, dir_path in plugin_dirs.items():
|
||||
if not os.path.exists(dir_path):
|
||||
continue
|
||||
|
||||
|
||||
# Get all Python files (excluding __init__.py)
|
||||
for filename in os.listdir(dir_path):
|
||||
if filename.endswith(".py") and filename != "__init__.py":
|
||||
module_name = filename[:-3] # Remove .py extension
|
||||
module_path = f"{module_prefix}.{module_name}"
|
||||
|
||||
module_path = f"changedetectionio.{dir_name}.plugins.{module_name}"
|
||||
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
# Register the plugin with pluggy
|
||||
@@ -314,24 +207,6 @@ def register_builtin_fetchers():
|
||||
if hasattr(webdriver_selenium, 'webdriver_selenium_plugin'):
|
||||
plugin_manager.register(webdriver_selenium.webdriver_selenium_plugin, 'builtin_webdriver_selenium')
|
||||
|
||||
|
||||
def register_builtin_restock_plugins():
|
||||
"""Register built-in restock processor plugins after all imports are complete.
|
||||
|
||||
Called from content_fetchers/__init__.py alongside register_builtin_fetchers()
|
||||
to avoid the circular import that occurs when loading via load_plugins_from_directories()
|
||||
(restock_diff/__init__.py → model.Watch → content_fetchers → pluggy_interface).
|
||||
"""
|
||||
import importlib
|
||||
module_path = 'changedetectionio.processors.restock_diff.plugins.llm_restock'
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
if not plugin_manager.is_registered(module):
|
||||
plugin_manager.register(module, 'llm_restock')
|
||||
logger.debug("Registered built-in restock plugin: llm_restock")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register llm_restock plugin: {e}")
|
||||
|
||||
# Helper function to collect UI stats extras from all plugins
|
||||
def collect_ui_edit_stats_extras(watch):
|
||||
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
||||
@@ -368,7 +243,7 @@ def collect_fetcher_status_icons(fetcher_name):
|
||||
|
||||
return None
|
||||
|
||||
def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url, llm_intent=None):
|
||||
def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url):
|
||||
"""Get itemprop availability data from plugins as a fallback.
|
||||
|
||||
This is called when the built-in get_itemprop_availability doesn't find good data.
|
||||
@@ -378,7 +253,6 @@ def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instanc
|
||||
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
|
||||
fetcher_instance: The fetcher instance that generated the content
|
||||
url: The URL being watched (watch.link - includes Jinja2 evaluation)
|
||||
llm_intent: Optional user-supplied intent string passed through to plugins
|
||||
|
||||
Returns:
|
||||
dict or None: Availability data dictionary from first matching plugin, or None
|
||||
@@ -388,8 +262,7 @@ def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instanc
|
||||
content=content,
|
||||
fetcher_name=fetcher_name,
|
||||
fetcher_instance=fetcher_instance,
|
||||
url=url,
|
||||
llm_intent=llm_intent,
|
||||
url=url
|
||||
)
|
||||
|
||||
# Return first non-None result with actual data
|
||||
@@ -626,82 +499,4 @@ def get_plugin_template_paths():
|
||||
template_paths.append(templates_dir)
|
||||
logger.debug(f"Added plugin template path: {templates_dir}")
|
||||
|
||||
return template_paths
|
||||
|
||||
|
||||
def apply_update_handler_alter(update_handler, watch, datastore):
|
||||
"""Apply update_handler_alter hooks from all plugins.
|
||||
|
||||
Allows plugins to wrap or modify the update_handler before it processes a watch.
|
||||
Multiple plugins can chain modifications - each plugin receives the result from
|
||||
the previous plugin.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance to potentially modify
|
||||
watch: The watch dict being processed
|
||||
datastore: The application datastore
|
||||
|
||||
Returns:
|
||||
object: The (potentially modified/wrapped) update_handler
|
||||
"""
|
||||
# Get all plugins that implement the update_handler_alter hook
|
||||
results = plugin_manager.hook.update_handler_alter(
|
||||
update_handler=update_handler,
|
||||
watch=watch,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
# Chain results - each plugin gets the result from the previous one
|
||||
current_handler = update_handler
|
||||
if results:
|
||||
for result in results:
|
||||
if result is not None:
|
||||
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
|
||||
current_handler = result
|
||||
|
||||
return current_handler
|
||||
|
||||
|
||||
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Apply update_finalize hooks from all plugins.
|
||||
|
||||
Called in the finally block after watch processing completes, allowing plugins
|
||||
to perform cleanup, update metrics, or log final status.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance (may be None)
|
||||
watch: The watch dict that was processed (may be None)
|
||||
datastore: The application datastore
|
||||
processing_exception: The exception from processing, or None if successful
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
# Call all plugins that implement the update_finalize hook
|
||||
plugin_manager.hook.update_finalize(
|
||||
update_handler=update_handler,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
processing_exception=processing_exception
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't let plugin errors crash the worker
|
||||
logger.error(f"Error in update_finalize hook: {e}")
|
||||
logger.exception(f"update_finalize hook exception details:")
|
||||
|
||||
|
||||
def collect_html_head_extras():
|
||||
"""Collect and combine HTML head extras from all plugins.
|
||||
|
||||
Called from a Flask template global so it always runs inside a request context.
|
||||
This means url_for() works correctly in plugin implementations, including when the
|
||||
app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
|
||||
sets SCRIPT_NAME so url_for() automatically prepends the prefix).
|
||||
|
||||
Returns:
|
||||
str: Combined HTML string to inject inside <head>, or empty string
|
||||
"""
|
||||
results = plugin_manager.hook.get_html_head_extras()
|
||||
parts = [r for r in results if r]
|
||||
return "\n".join(parts) if parts else ""
|
||||
return template_paths
|
||||
@@ -9,15 +9,6 @@ Some suggestions for the future
|
||||
|
||||
- `graphical`
|
||||
|
||||
## API schema extension (`api.yaml`)
|
||||
|
||||
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
|
||||
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
|
||||
making `processor_config_<name>` a valid field on all watch create/update API calls.
|
||||
The fully merged spec is served live at `/api/v1/full-spec`.
|
||||
|
||||
See `restock_diff/api.yaml` for a working example.
|
||||
|
||||
## Todo
|
||||
|
||||
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
||||
|
||||
@@ -341,18 +341,6 @@ def get_processor_descriptions():
|
||||
return descriptions
|
||||
|
||||
|
||||
def wcag_text_color(hex_bg: str) -> str:
|
||||
"""Return #000000 or #ffffff for maximum WCAG contrast against hex_bg."""
|
||||
hex_bg = hex_bg.lstrip('#')
|
||||
if len(hex_bg) != 6:
|
||||
return '#000000'
|
||||
r, g, b = (int(hex_bg[i:i+2], 16) / 255 for i in (0, 2, 4))
|
||||
def lin(c):
|
||||
return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
|
||||
L = 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)
|
||||
return '#000000' if L > 0.179 else '#ffffff'
|
||||
|
||||
|
||||
def generate_processor_badge_colors(processor_name):
|
||||
"""
|
||||
Generate consistent colors for a processor badge based on its name.
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
import asyncio
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
from copy import deepcopy
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
@@ -97,22 +94,6 @@ class difference_detection_processor():
|
||||
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
|
||||
self.last_raw_content_checksum = None
|
||||
|
||||
async def validate_iana_url(self):
|
||||
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
|
||||
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
|
||||
through call_browser().
|
||||
"""
|
||||
if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
|
||||
return
|
||||
parsed = urlparse(self.watch.link)
|
||||
if not parsed.hostname:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
|
||||
raise Exception(
|
||||
f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
|
||||
)
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
@@ -127,8 +108,6 @@ class difference_detection_processor():
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
await self.validate_iana_url()
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
@@ -259,16 +238,6 @@ class difference_detection_processor():
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
|
||||
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
def get_extra_watch_config(self, filename):
|
||||
|
||||
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
# Get error information for the template
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
@@ -61,7 +64,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
screenshot=screenshot_url,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
extra_title=f" - {watch.label} - {gettext('Extract Data')}",
|
||||
extra_title=f" - {watch.label} - Extract Data",
|
||||
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
|
||||
pure_menu_fixed=False
|
||||
)
|
||||
|
||||
@@ -100,13 +100,7 @@ class guess_stream_type():
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||
self.is_plaintext = True
|
||||
else:
|
||||
self.is_json = True
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
# magic will call a rss document 'xml'
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
@@ -11,8 +10,6 @@ supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
|
||||
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
@@ -34,7 +31,6 @@ class Restock(dict):
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
# @todo locale needs to be the locale of the webpage
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
@@ -66,22 +62,15 @@ class Restock(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def get_price_from_history_str(history_str):
|
||||
m = _price_re.search(history_str)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
try:
|
||||
return str(Decimal(m.group(1)))
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
class Watch(BaseWatch):
|
||||
def __init__(self, *arg, **kw):
|
||||
super().__init__(*arg, **kw)
|
||||
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||
|
||||
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing' : 'in_stock_only'
|
||||
} #@todo update
|
||||
|
||||
def clear_watch(self):
|
||||
super().clear_watch()
|
||||
@@ -90,27 +79,13 @@ class Watch(BaseWatch):
|
||||
def extra_notification_token_values(self):
|
||||
values = super().extra_notification_token_values()
|
||||
values['restock'] = self.get('restock', {})
|
||||
|
||||
values['restock']['previous_price'] = None
|
||||
if self.history_n >= 2:
|
||||
history = self.history
|
||||
if history and len(history) >=2:
|
||||
"""Unfortunately for now timestamp is stored as string key"""
|
||||
sorted_keys = sorted(list(history), key=lambda x: int(x))
|
||||
sorted_keys.reverse()
|
||||
|
||||
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
|
||||
if price_str:
|
||||
values['restock']['previous_price'] = get_price_from_history_str(price_str)
|
||||
return values
|
||||
|
||||
def extra_notification_token_placeholder_info(self):
|
||||
values = super().extra_notification_token_placeholder_info()
|
||||
|
||||
values.append(('restock.price', "Price detected"))
|
||||
values.append(('restock.in_stock', "In stock status"))
|
||||
values.append(('restock.original_price', "Original price at first check"))
|
||||
values.append(('restock.previous_price', "Previous price in history"))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
components:
|
||||
schemas:
|
||||
processor_config_restock_diff:
|
||||
type: object
|
||||
description: Configuration for the restock_diff processor (restock and price tracking)
|
||||
properties:
|
||||
in_stock_processing:
|
||||
type: string
|
||||
enum: [in_stock_only, all_changes, 'off']
|
||||
default: in_stock_only
|
||||
description: |
|
||||
When to trigger on stock changes:
|
||||
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
|
||||
- `all_changes`: Trigger on any availability change
|
||||
- `off`: Disable stock/availability tracking
|
||||
follow_price_changes:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Monitor and track price changes
|
||||
price_change_min:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price drops below this value
|
||||
price_change_max:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price rises above this value
|
||||
price_change_threshold_percent:
|
||||
type: [number, 'null']
|
||||
minimum: 0
|
||||
maximum: 100
|
||||
description: Minimum price change percentage since the original price to trigger a notification
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
'url': 'https://example.com/product',
|
||||
'processor': 'restock_diff',
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 5,
|
||||
}
|
||||
}
|
||||
response = requests.post('http://localhost:5000/api/v1/watch',
|
||||
headers=headers, json=data)
|
||||
print(response.json())
|
||||
|
||||
/watch/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
uuid = 'YOUR-UUID'
|
||||
data = {
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'all_changes',
|
||||
'follow_price_changes': True,
|
||||
'price_change_min': 10.00,
|
||||
'price_change_max': 500.00,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
|
||||
/tag/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"overrides_watch": true,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 10
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
tag_uuid = 'YOUR-TAG-UUID'
|
||||
data = {
|
||||
'overrides_watch': True,
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 10,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
|
||||
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
processor_config_restock_diff = FormField(RestockSettingsForm)
|
||||
restock_settings = FormField(RestockSettingsForm)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return _l('Restock & Price Detection')
|
||||
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):
|
||||
|
||||
output += """
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
<script>
|
||||
<script>
|
||||
$(document).ready(function () {
|
||||
toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
|
||||
toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
|
||||
});
|
||||
</script>
|
||||
|
||||
<fieldset id="restock-fieldset-price-group">
|
||||
<div class="pure-control-group">
|
||||
<fieldset class="pure-group inline-radio">
|
||||
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
|
||||
{{ render_field(form.restock_settings.in_stock_processing) }}
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
|
||||
{{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
|
||||
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
{{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
|
||||
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
|
||||
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
|
||||
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||
</fieldset>
|
||||
</fieldset>
|
||||
</div>
|
||||
</fieldset>
|
||||
"""
|
||||
|
||||
@@ -1,295 +0,0 @@
|
||||
"""
|
||||
LLM fallback plugin for price and restock info extraction.
|
||||
|
||||
When the built-in structured-metadata extraction (JSON-LD, microdata, OpenGraph)
|
||||
fails to produce both a price and availability, this plugin is called as a last
|
||||
resort. It sends a trimmed, HTML-stripped version of the page to the configured
|
||||
LLM and asks it to return a structured JSON answer.
|
||||
|
||||
The module-level `datastore` variable is injected at startup by
|
||||
`inject_datastore_into_plugins()` in pluggy_interface.py.
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
from loguru import logger
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
# Injected at startup by inject_datastore_into_plugins()
|
||||
datastore = None
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
'You are an expert price and restock extraction utility. '
|
||||
'Your task is to analyse a product page and determine the price and stock status of the MAIN product only.\n\n'
|
||||
|
||||
'AVAILABILITY — treat as "in stock":\n'
|
||||
'- Action buttons near the product: "Add to cart", "Add to basket", "Buy now", '
|
||||
'"Order now", "Purchase", "Import", "Add to bag", "Add to trolley", "In stock", '
|
||||
'"Available", "Ships in X days/weeks", "In store", "Pick up today".\n'
|
||||
'- "Pre-order" or "Reserve" — the item is orderable, treat as "in stock".\n'
|
||||
'- "Only X left", "Almost gone", "Low stock", "Limited availability" — still in stock.\n'
|
||||
'- "Request a quote" or "Contact us for pricing" — item is available, price is null.\n'
|
||||
'- IMPORTANT: Ignore cart/basket/bag links in the page HEADER or navigation bar '
|
||||
'(e.g. a shopping cart icon showing item count). That reflects what is already in '
|
||||
'the visitor\'s cart — it says nothing about whether THIS product is available.\n\n'
|
||||
|
||||
'PRICE — what NOT to use:\n'
|
||||
'- A "$0.00" or "0" that appears near header/nav links such as "Login", "Wishlist", '
|
||||
'"Contact Us", "My Account" is an empty shopping-cart indicator, NOT the product price. '
|
||||
'Ignore it entirely — return null for price rather than 0 in this situation.\n'
|
||||
'- Only return 0 (free) when the page clearly states the product itself costs nothing '
|
||||
'(e.g. "Free", "Free download", "Price: $0").\n\n'
|
||||
|
||||
'AVAILABILITY — treat as "out of stock":\n'
|
||||
'- "Out of stock", "Sold out", "Unavailable", "Currently unavailable", '
|
||||
'"Temporarily out of stock", "Discontinued", "No longer available", '
|
||||
'"Notify me when available", "Email me when back", "Join waitlist".\n\n'
|
||||
|
||||
'AVAILABILITY — return null when uncertain:\n'
|
||||
'- The page asks the user to select a size, colour, or other variant first '
|
||||
'("Select an option", "Choose a size") — availability depends on the variant, so return null.\n'
|
||||
'- You cannot clearly tell from the page content whether the item is available.\n\n'
|
||||
|
||||
'PRICE rules:\n'
|
||||
'- Extract the main selling price as a plain number, no currency symbol.\n'
|
||||
'- Prices may use any popular locale format — interpret them all correctly and return a plain decimal number. '
|
||||
'Examples: "10 000 Kč" = 10000, "1.299,95 €" = 1299.95, "1,299.95" = 1299.95, '
|
||||
'"10 000,50" = 10000.50, "£1.299" = 1299, "¥10000" = 10000.\n'
|
||||
'- If both an original (crossed-out) price and a sale/current price appear, use the sale price.\n'
|
||||
'- "From $X" or "Starting at $X" are teaser prices — prefer a definite price or return null.\n'
|
||||
'- A price of 0 (free) is valid — return 0, not null.\n'
|
||||
'- If pricing requires a quote or login, return null for price.\n'
|
||||
'- Ignore prices shown in search/filter UI elements (e.g. "Price from: — to:").\n'
|
||||
'- IMPORTANT: Ignore ALL prices that appear inside or below recommendation/discovery blocks '
|
||||
'such as: "Similar items", "You may also like", "Customers also bought", '
|
||||
'"Based on your browsing", "Based on your shopping", "Frequently bought together", '
|
||||
'"People also viewed", "Related products", "Sponsored products", "More like this", '
|
||||
'"Other sellers", "Compare with similar items". '
|
||||
'These sections contain prices for OTHER products, not the main product.\n'
|
||||
'- When multiple prices appear on the page, prefer the price that is positioned '
|
||||
'earliest/highest in the page content — it is almost always the main product price. '
|
||||
'Prices appearing after large blocks of descriptive text or review sections are '
|
||||
'likely from recommendation widgets and should be ignored.\n\n'
|
||||
|
||||
'CLASSIFIEDS AND LISTING PAGES:\n'
|
||||
'- On classifieds or marketplace sites (e.g. eBay listings, Craigslist, Bazoš, Gumtree), '
|
||||
'if a price is shown alongside seller contact details or a "Contact seller" link, '
|
||||
'treat the item as "instock" — the listing being active means it is available.\n\n'
|
||||
|
||||
'Return ONLY a JSON object with exactly these three keys:\n'
|
||||
' "price" — number or null\n'
|
||||
' "currency" — ISO-4217 code (USD, EUR, GBP …) or null\n'
|
||||
' "availability" — exactly one of: "instock", "outofstock", or null\n'
|
||||
' Use "instock" when the product can be ordered/purchased.\n'
|
||||
' Use "outofstock" when it cannot.\n'
|
||||
' Use null when you genuinely cannot tell.\n'
|
||||
'No markdown, no backticks, no explanation — pure JSON only.'
|
||||
)
|
||||
|
||||
_MAX_CONTENT_CHARS = 8_000
|
||||
|
||||
|
||||
def _extract_jsonld(html_content: str) -> str:
|
||||
"""Extract JSON-LD blocks — these contain reliable structured product data."""
|
||||
blocks = re.findall(
|
||||
r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>',
|
||||
html_content, flags=re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
if not blocks:
|
||||
return ''
|
||||
combined = ' '.join(b.strip() for b in blocks)
|
||||
return combined[:2000]
|
||||
|
||||
|
||||
# Semantic tags always treated as chrome (nav/header/footer)
|
||||
_CHROME_TAGS = {'nav', 'header', 'footer', 'aside'}
|
||||
|
||||
# id/class fragments that strongly indicate navigation or site-chrome
|
||||
_CHROME_PATTERNS = re.compile(
|
||||
r'\b(nav|navigation|navbar|menu|mega-menu|breadcrumb|breadcrumbs?|'
|
||||
r'site-header|page-header|top-bar|top-nav|top-header|mobile-nav|header-bar|'
|
||||
r'site-footer|page-footer|footer-links|related|similar|'
|
||||
r'you-?may-?also|customers?-?also|frequently-?bought|'
|
||||
r'people-?also|sponsored|recommendation|widget|sidebar|'
|
||||
r'cross-?sell|up-?sell)\b',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _remove_chrome(html_content: str) -> str:
|
||||
"""Use BS4 to strip navigation, header, footer and recommendation noise.
|
||||
|
||||
Uses html.parser (built-in, no lxml) to avoid memory leak issues.
|
||||
Falls back to the original HTML string if BS4 fails for any reason.
|
||||
"""
|
||||
try:
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# Snapshot the full tag list before any decompositions so we don't
|
||||
# mutate the tree while iterating it. After a parent is decomposed
|
||||
# its children become orphans (parent=None) — skip those.
|
||||
for tag in list(soup.find_all(True)):
|
||||
if not isinstance(tag, Tag) or tag.parent is None:
|
||||
continue
|
||||
name = tag.name or ''
|
||||
if name in _CHROME_TAGS:
|
||||
tag.decompose()
|
||||
continue
|
||||
try:
|
||||
cls_list = tag.get('class') or []
|
||||
cls_str = ' '.join(cls_list) if isinstance(cls_list, list) else str(cls_list)
|
||||
id_str = tag.get('id') or ''
|
||||
except Exception:
|
||||
continue
|
||||
if _CHROME_PATTERNS.search(cls_str + ' ' + id_str):
|
||||
tag.decompose()
|
||||
|
||||
return str(soup)
|
||||
except Exception as e:
|
||||
logger.debug(f"BS4 chrome removal failed ({e}), using raw HTML")
|
||||
return html_content
|
||||
|
||||
|
||||
def _strip_html(html_content: str) -> str:
|
||||
"""HTML-to-text for LLM consumption.
|
||||
|
||||
1. Extracts JSON-LD (structured product data) to prepend.
|
||||
2. Strips nav/header/footer/recommendation blocks via BS4.
|
||||
3. Removes all remaining tags and collapses whitespace.
|
||||
JSON-LD is prepended so reliable price/availability data is always visible
|
||||
to the LLM regardless of how deep it sits in the page.
|
||||
"""
|
||||
jsonld = _extract_jsonld(html_content)
|
||||
|
||||
# Remove site-chrome before generic tag stripping
|
||||
cleaned = _remove_chrome(html_content)
|
||||
|
||||
# Drop HTML comments (can contain large disabled markup blocks)
|
||||
text = re.sub(r'<!--.*?-->', ' ', cleaned, flags=re.DOTALL)
|
||||
# Drop all <script> and <style> blocks
|
||||
text = re.sub(r'<(script|style)[^>]*>.*?</(script|style)>', ' ', text, flags=re.DOTALL | re.IGNORECASE)
|
||||
# Strip remaining tags
|
||||
text = re.sub(r'<[^>]+>', ' ', text)
|
||||
# Decode common entities
|
||||
text = (text
|
||||
.replace(' ', ' ')
|
||||
.replace('&', '&')
|
||||
.replace('<', '<')
|
||||
.replace('>', '>')
|
||||
.replace('"', '"')
|
||||
.replace(''', "'"))
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
|
||||
if jsonld:
|
||||
budget = _MAX_CONTENT_CHARS - len(jsonld) - 1
|
||||
return (jsonld + ' ' + text[:budget]).strip()
|
||||
return text[:_MAX_CONTENT_CHARS]
|
||||
|
||||
|
||||
@hookimpl
|
||||
def get_itemprop_availability_override(content, fetcher_name, fetcher_instance, url, llm_intent=None):
|
||||
"""Use an LLM as a last-resort fallback for price and restock extraction."""
|
||||
global datastore
|
||||
|
||||
if datastore is None:
|
||||
logger.debug("LLM restock fallback: no datastore injected yet, skipping")
|
||||
return None
|
||||
|
||||
# Gate on the user setting (default True — enabled out of the box)
|
||||
app_settings = datastore.data.get('settings', {}).get('application', {})
|
||||
if not app_settings.get('llm_restock_use_fallback_extract', True):
|
||||
logger.debug("LLM restock fallback: disabled in settings")
|
||||
return None
|
||||
|
||||
try:
|
||||
from changedetectionio.llm.evaluator import get_llm_config, accumulate_global_tokens
|
||||
from changedetectionio.llm import client as llm_client
|
||||
except ImportError as e:
|
||||
logger.debug(f"LLM restock fallback: LLM libraries not available ({e})")
|
||||
return None
|
||||
|
||||
llm_cfg = get_llm_config(datastore)
|
||||
if not llm_cfg or not llm_cfg.get('model'):
|
||||
logger.debug("LLM restock fallback: no LLM model configured, skipping")
|
||||
return None
|
||||
|
||||
text_content = _strip_html(content) if content else ''
|
||||
logger.debug(f"LLM restock fallback: stripped HTML to {len(text_content)} chars for {url}")
|
||||
if not text_content.strip():
|
||||
logger.debug("LLM restock fallback: no text content after stripping HTML")
|
||||
return None
|
||||
|
||||
logger.info(f"LLM restock fallback: using LLM ({llm_cfg['model']}) for price/stock extraction - {url}")
|
||||
|
||||
user_prompt = f'URL: {url or "unknown"}\n\nPage content:\n{text_content}'
|
||||
if llm_intent:
|
||||
user_prompt += f'\n\nUser notification intent: {llm_intent}'
|
||||
|
||||
try:
|
||||
raw, tokens, input_tokens, output_tokens = llm_client.completion(
|
||||
model=llm_cfg['model'],
|
||||
messages=[
|
||||
{'role': 'system', 'content': SYSTEM_PROMPT},
|
||||
{'role': 'user', 'content': user_prompt},
|
||||
],
|
||||
api_key=llm_cfg.get('api_key'),
|
||||
api_base=llm_cfg.get('api_base'),
|
||||
max_tokens=80,
|
||||
)
|
||||
|
||||
accumulate_global_tokens(
|
||||
datastore, tokens,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
model=llm_cfg['model'],
|
||||
)
|
||||
|
||||
# Strip optional markdown fences the model might add
|
||||
raw = raw.strip()
|
||||
if raw.startswith('```'):
|
||||
raw = re.sub(r'^```[a-z]*\n?', '', raw)
|
||||
raw = raw.rstrip('`').strip()
|
||||
|
||||
logger.debug(f"LLM restock fallback raw response: {raw!r}")
|
||||
|
||||
result = json.loads(raw)
|
||||
|
||||
price = result.get('price')
|
||||
currency = result.get('currency') or None
|
||||
availability = result.get('availability') or None
|
||||
|
||||
# Normalise price to float
|
||||
if price is not None:
|
||||
try:
|
||||
if isinstance(price, str):
|
||||
price = float(re.sub(r'[^\d.]', '', price))
|
||||
else:
|
||||
price = float(price)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"LLM restock fallback: could not convert price {price!r} to float, ignoring")
|
||||
price = None
|
||||
|
||||
if price is None and not availability:
|
||||
logger.info(f"LLM restock fallback: LLM returned no usable price or availability for {url} (raw: {raw!r})")
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
f"LLM restock fallback result: price={price} currency={currency} "
|
||||
f"availability={availability!r} url={url}"
|
||||
)
|
||||
return {
|
||||
'price': price,
|
||||
'currency': currency,
|
||||
'availability': availability,
|
||||
'_tokens': tokens,
|
||||
'_input_tokens': input_tokens,
|
||||
'_output_tokens': output_tokens,
|
||||
'_model': llm_cfg['model'],
|
||||
}
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"LLM restock fallback: JSON parse failed ({e}) - raw response was: {raw!r}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM restock fallback: extraction failed for {url}: {e}")
|
||||
return None
|
||||
@@ -437,32 +437,26 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
#useless
|
||||
# from ...html_tools import html_to_text
|
||||
# text = html_to_text(self.fetcher.content)
|
||||
# logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
# if not len(text):
|
||||
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
# raise ReplyWithContentButNoText(url=watch.link,
|
||||
# status_code=self.fetcher.get_last_status_code(),
|
||||
# screenshot=self.fetcher.screenshot,
|
||||
# html_content=self.fetcher.content,
|
||||
# xpath_data=self.fetcher.xpath_data
|
||||
# )
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Which restock settings to compare against?
|
||||
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
|
||||
_extra_config = self.get_extra_watch_config('restock_diff.json')
|
||||
restock_settings = _extra_config.get('restock_diff') or {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
}
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
|
||||
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('processor_config_restock_diff') or {}
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
|
||||
break
|
||||
|
||||
@@ -486,7 +480,8 @@ class perform_site_check(difference_detection_processor):
|
||||
has_price = itemprop_availability.get('price') is not None
|
||||
has_availability = itemprop_availability.get('availability') is not None
|
||||
|
||||
if not (has_price and has_availability):
|
||||
# @TODO !!! some setting like "Use as fallback" or "always use", "t
|
||||
if not (has_price and has_availability) or True:
|
||||
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
|
||||
fetcher_name = watch.get('fetch_backend', 'html_requests')
|
||||
|
||||
@@ -505,23 +500,9 @@ class perform_site_check(difference_detection_processor):
|
||||
# Try plugin override - plugins can decide if they support this fetcher
|
||||
if fetcher_name:
|
||||
logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})")
|
||||
from changedetectionio.llm.evaluator import resolve_intent
|
||||
_llm_intent, _ = resolve_intent(watch, self.datastore)
|
||||
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link, llm_intent=_llm_intent or None)
|
||||
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
|
||||
|
||||
if plugin_availability:
|
||||
# Extract and strip LLM token metadata before using as Restock data
|
||||
_plugin_tokens = plugin_availability.pop('_tokens', 0)
|
||||
_plugin_input_tokens = plugin_availability.pop('_input_tokens', 0)
|
||||
_plugin_output_tokens = plugin_availability.pop('_output_tokens', 0)
|
||||
_plugin_model = plugin_availability.pop('_model', '')
|
||||
|
||||
# Update per-watch token counters directly on the watch (same
|
||||
# pattern as evaluator.py) so they're committed when update_watch runs
|
||||
if _plugin_tokens:
|
||||
watch['llm_last_tokens_used'] = _plugin_tokens
|
||||
watch['llm_tokens_used_cumulative'] = (watch.get('llm_tokens_used_cumulative') or 0) + _plugin_tokens
|
||||
|
||||
# Plugin provided better data, use it
|
||||
plugin_has_price = plugin_availability.get('price') is not None
|
||||
plugin_has_availability = plugin_availability.get('availability') is not None
|
||||
|
||||
@@ -283,7 +283,4 @@ def query_price_availability(extracted_data):
|
||||
if not result.get('availability') and 'availability' in microdata:
|
||||
result['availability'] = microdata['availability']
|
||||
|
||||
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
|
||||
# using something like babel you need to know the locale of the website and even then it can be problematic
|
||||
# we dont really do anything with the price data so far.. so just accept it the way it comes.
|
||||
return result
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
{% block content %}
|
||||
<div class="tabs">
|
||||
<ul>
|
||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">{{ _('Error Text') }}</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">{{ _('Error Screenshot') }}</a></li> {% endif %}
|
||||
<li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">{{ _('Text') }}</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">{{ _('Screenshot') }}</a></li>
|
||||
<li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">{{ _('Extract Data') }}</a></li>
|
||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">Error Text</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">Error Screenshot</a></li> {% endif %}
|
||||
<li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">Text</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">Screenshot</a></li>
|
||||
<li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">Extract Data</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -17,23 +17,23 @@
|
||||
<form id="extract-data-form" class="pure-form pure-form-stacked edit-form" action="{{ url_for('ui.ui_diff.diff_history_page_extract_POST', uuid=uuid) }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<p>{{ _('This tool will extract text data from all of the watch history.') }}</p>
|
||||
<p>This tool will extract text data from all of the watch history.</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(extract_form.extract_regex) }}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.')|safe }}<br>
|
||||
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
|
||||
|
||||
<p>
|
||||
{{ _('For example, to extract only the numbers from text') }} ‐<br>
|
||||
<strong>{{ _('Raw text') }}</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
|
||||
<strong>{{ _('RegEx to extract:') }}</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
|
||||
For example, to extract only the numbers from text ‐<br>
|
||||
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
|
||||
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
|
||||
</p>
|
||||
<p>
|
||||
<a href="https://RegExr.com/">{{ _('Be sure to test your RegEx here.') }}</a>
|
||||
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
|
||||
</p>
|
||||
<p>
|
||||
{{ _('Each RegEx group bracket') }} <code>()</code> {{ _('will be in its own column, the first column value is always the date.') }}
|
||||
Each RegEx group bracket <code>()</code> will be in its own column, the first column value is always the date.
|
||||
</p>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -36,7 +36,7 @@ def _task(watch, update_handler):
|
||||
|
||||
|
||||
def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
|
||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
from changedetectionio.model.Watch import model as watch_model
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
@@ -65,12 +65,6 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
# Only update vars that came in via the AJAX post
|
||||
p = {k: v for k, v in form.data.items() if k in form_data.keys()}
|
||||
tmp_watch.update(p)
|
||||
|
||||
# Apply llm_intent from form directly — it's not part of processor_text_json_diff_form
|
||||
# but the AJAX sends all visible inputs, so it arrives in form_data
|
||||
if hasattr(form_data, 'get') and 'llm_intent' in form_data:
|
||||
tmp_watch['llm_intent'] = (form_data.get('llm_intent') or '').strip()
|
||||
|
||||
blank_watch_no_filters = watch_model(datastore_path=datastore.datastore_path, __datastore=datastore.data)
|
||||
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
||||
|
||||
@@ -126,18 +120,6 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
# LLM preview extraction — asks the LLM to directly answer the intent
|
||||
# against the current filtered content (no diff comparison).
|
||||
# e.g. intent "how many articles?" → answer "30 articles listed"
|
||||
# Results are NOT cached back to the real watch.
|
||||
llm_evaluation = None
|
||||
try:
|
||||
from changedetectionio.llm.evaluator import preview_extract
|
||||
if text_after_filter and text_after_filter.strip() not in ('', 'Empty content'):
|
||||
llm_evaluation = preview_extract(tmp_watch, datastore, content=text_after_filter)
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM preview evaluation failed for {watch_uuid}: {e}")
|
||||
|
||||
logger.trace(f"Parsed in {time.time() - now:.3f}s")
|
||||
|
||||
return ({
|
||||
@@ -146,7 +128,6 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'blocked_line_numbers': blocked_line_numbers,
|
||||
'duration': time.time() - now,
|
||||
'ignore_line_numbers': ignore_line_numbers,
|
||||
'llm_evaluation': llm_evaluation,
|
||||
'trigger_line_numbers': trigger_line_numbers,
|
||||
})
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ a side-by-side or unified diff view with syntax highlighting and change markers.
|
||||
|
||||
import os
|
||||
import time
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio import diff, strtobool
|
||||
@@ -98,7 +97,6 @@ DIFF_PREFERENCES_CONFIG = {
|
||||
'added': {'default': True, 'type': 'bool'},
|
||||
'replaced': {'default': True, 'type': 'bool'},
|
||||
'type': {'default': 'diffLines', 'type': 'value'},
|
||||
'llm_all_changes': {'default': False, 'type': 'bool'},
|
||||
}
|
||||
|
||||
def render(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
|
||||
@@ -156,7 +154,11 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
@@ -200,23 +202,6 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
if str(from_version) != str(dates[-2]) or str(to_version) != str(dates[-1]):
|
||||
note = 'Note: You are not viewing the latest changes.'
|
||||
|
||||
llm_configured = bool(
|
||||
datastore.data.get('settings', {}).get('application', {}).get('llm', {}).get('model')
|
||||
)
|
||||
|
||||
# Load cached AI diff summary for this exact from→to + prompt combination
|
||||
viewing_latest = str(to_version) == str(dates[-1])
|
||||
llm_diff_summary = ''
|
||||
llm_summary_prompt = ''
|
||||
if llm_configured:
|
||||
try:
|
||||
from changedetectionio.llm.evaluator import get_effective_summary_prompt
|
||||
_prompt = get_effective_summary_prompt(watch, datastore)
|
||||
llm_summary_prompt = _prompt
|
||||
llm_diff_summary = watch.get_llm_diff_summary(from_version, to_version, prompt=_prompt)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load llm-diff-summary for {uuid}: {e}")
|
||||
|
||||
output = render_template("diff.html",
|
||||
#initial_scroll_line_number=100,
|
||||
bottom_horizontal_offscreen_contents=offscreen_content,
|
||||
@@ -224,9 +209,9 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
current_diff_url=watch['url'],
|
||||
diff_cell_grid=diff_cell_grid,
|
||||
diff_prefs=diff_prefs,
|
||||
extra_classes=' '.join(filter(None, ['difference-page', 'llm-configured' if llm_configured else ''])),
|
||||
extra_classes='difference-page',
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - {watch.label} - {gettext('History')}",
|
||||
extra_title=f" - {watch.label} - History",
|
||||
extract_form=extract_form,
|
||||
from_version=str(from_version),
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
@@ -243,9 +228,5 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
uuid=uuid,
|
||||
versions=dates, # All except current/last
|
||||
watch_a=watch,
|
||||
llm_configured=llm_configured,
|
||||
llm_diff_summary=llm_diff_summary,
|
||||
llm_summary_prompt=llm_summary_prompt,
|
||||
viewing_latest=viewing_latest,
|
||||
)
|
||||
return output
|
||||
|
||||
@@ -85,10 +85,6 @@ class FilterConfig:
|
||||
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
|
||||
return self._subtractive_selectors_cache
|
||||
|
||||
@property
|
||||
def extract_lines_containing(self):
|
||||
return self._get_merged_rules('extract_lines_containing')
|
||||
|
||||
@property
|
||||
def extract_text(self):
|
||||
return self._get_merged_rules('extract_text')
|
||||
@@ -105,30 +101,6 @@ class FilterConfig:
|
||||
def text_should_not_be_present(self):
|
||||
return self._get_merged_rules('text_should_not_be_present')
|
||||
|
||||
def get_filter_config_hash(self):
|
||||
"""
|
||||
Stable hash of the effective filter configuration.
|
||||
|
||||
Used by the skip-logic in run_changedetection() so that any change to
|
||||
global settings, tag overrides, or watch filters automatically invalidates
|
||||
the raw-content-unchanged shortcut — without needing scattered
|
||||
clear_all_last_checksums() calls at every settings mutation site.
|
||||
"""
|
||||
app = self.datastore.data['settings']['application']
|
||||
config = {
|
||||
'extract_lines_containing': sorted(self.extract_lines_containing),
|
||||
'extract_text': sorted(self.extract_text),
|
||||
'ignore_text': sorted(self.ignore_text),
|
||||
'include_filters': sorted(self.include_filters),
|
||||
'subtractive_selectors': sorted(self.subtractive_selectors),
|
||||
'text_should_not_be_present': sorted(self.text_should_not_be_present),
|
||||
'trigger_text': sorted(self.trigger_text),
|
||||
# Global processing flags not captured by the filter lists above
|
||||
'ignore_whitespace': app.get('ignore_whitespace', False),
|
||||
'strip_ignored_lines': app.get('strip_ignored_lines', False),
|
||||
}
|
||||
return hashlib.md5(json.dumps(config, sort_keys=True).encode()).hexdigest()
|
||||
|
||||
@property
|
||||
def has_include_filters(self):
|
||||
return bool(self.include_filters) and bool(self.include_filters[0].strip())
|
||||
@@ -163,17 +135,6 @@ class ContentTransformer:
|
||||
text = text.replace("\n\n", "\n")
|
||||
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
@staticmethod
|
||||
def extract_lines_containing(text, substrings):
|
||||
"""Keep only lines that contain at least one of the given substrings (case-insensitive)."""
|
||||
needles = [s.lower() for s in substrings if s.strip()]
|
||||
if not needles:
|
||||
return text
|
||||
return '\n'.join(
|
||||
line for line in text.splitlines()
|
||||
if any(needle in line.lower() for needle in needles)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def extract_by_regex(text, regex_patterns):
|
||||
"""Extract text matching regex patterns."""
|
||||
@@ -386,7 +347,6 @@ class ContentProcessor:
|
||||
def extract_text_from_html(self, html_content, stream_content_type):
|
||||
"""Convert HTML to plain text."""
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
|
||||
return html_tools.html_to_text(
|
||||
html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
@@ -416,26 +376,19 @@ class perform_site_check(difference_detection_processor):
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||
|
||||
# Build filter config up front so we can hash it for the skip check.
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
current_filter_config_hash = filter_config.get_filter_config_hash()
|
||||
|
||||
# Skip only when ALL of these hold:
|
||||
# 1. raw HTML is unchanged
|
||||
# 2. watch config was not edited (was_edited covers per-watch field changes)
|
||||
# 3. effective filter config is unchanged (covers global/tag setting changes that
|
||||
# bypass was_edited — e.g. global_ignore_text, global_subtractive_selectors)
|
||||
# last_filter_config_hash being False means first run or upgrade: don't skip.
|
||||
# Skip processing only if BOTH conditions are true:
|
||||
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||
# separate checks for trigger_text or other processing rules.
|
||||
if (not force_reprocess and
|
||||
not watch.was_edited and
|
||||
self.last_raw_content_checksum and
|
||||
self.last_raw_content_checksum == current_raw_document_checksum and
|
||||
watch.get('last_filter_config_hash') and
|
||||
watch.get('last_filter_config_hash') == current_filter_config_hash):
|
||||
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
# Initialize remaining components
|
||||
# Initialize components
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
||||
transformer = ContentTransformer()
|
||||
rule_engine = RuleEngine()
|
||||
@@ -456,7 +409,6 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||
update_obj['last_filter_config_hash'] = current_filter_config_hash
|
||||
|
||||
# === CONTENT PREPROCESSING ===
|
||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
||||
@@ -550,10 +502,6 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# === LINE FILTER (plain-text substring) ===
|
||||
if filter_config.extract_lines_containing:
|
||||
stripped_text = transformer.extract_lines_containing(stripped_text, filter_config.extract_lines_containing)
|
||||
|
||||
# === REGEX EXTRACTION ===
|
||||
if filter_config.extract_text:
|
||||
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
|
||||
@@ -587,8 +535,8 @@ class perform_site_check(difference_detection_processor):
|
||||
# === BLOCKING RULES EVALUATION ===
|
||||
blocked = False
|
||||
|
||||
# Check trigger_text - use text_for_checksuming so ignore_text can suppress trigger_text
|
||||
if rule_engine.evaluate_trigger_text(text_for_checksuming, filter_config.trigger_text):
|
||||
# Check trigger_text
|
||||
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
|
||||
blocked = True
|
||||
|
||||
# Check text_should_not_be_present
|
||||
|
||||
@@ -29,11 +29,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Perform the operation
|
||||
if op == 'pause':
|
||||
watch.toggle_pause()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
||||
elif op == 'mute':
|
||||
watch.toggle_mute()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
||||
elif op == 'recheck':
|
||||
# Import here to avoid circular imports
|
||||
|
||||
@@ -199,31 +199,8 @@ def handle_watch_update(socketio, **kwargs):
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
|
||||
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
|
||||
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
|
||||
The exception is caught inside run_wsgi and routed to self.server.log() — it never
|
||||
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
|
||||
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
|
||||
disconnect correctly via its own disconnect handler and timeout logic."""
|
||||
try:
|
||||
from werkzeug.serving import BaseWSGIServer
|
||||
_original_log = BaseWSGIServer.log
|
||||
|
||||
def _filtered_log(self, type, message, *args):
|
||||
if type == 'error' and 'write() before start_response' in message:
|
||||
return
|
||||
_original_log(self, type, message, *args)
|
||||
|
||||
BaseWSGIServer.log = _filtered_log
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
_suppress_werkzeug_ws_abrupt_disconnect_noise()
|
||||
|
||||
import platform
|
||||
import sys
|
||||
|
||||
@@ -368,4 +345,4 @@ def init_socketio(app, datastore):
|
||||
|
||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
||||
return socketio
|
||||
return socketio
|
||||
@@ -44,12 +44,12 @@ data_sanity_test () {
|
||||
cd ..
|
||||
TMPDIR=$(mktemp -d)
|
||||
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
||||
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
||||
PID=$!
|
||||
sleep 5
|
||||
kill $PID
|
||||
sleep 2
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR &
|
||||
./changedetection.py -p $PORT_N -d $TMPDIR &
|
||||
PID=$!
|
||||
sleep 5
|
||||
# On a restart the URL should still be there
|
||||
|
||||
@@ -1,20 +1,5 @@
|
||||
function checkDiscordHtmlWarning() {
|
||||
var urls = $('textarea.notification-urls').val() || '';
|
||||
var format = $('select.notification-format').val() || '';
|
||||
var isDiscord = /discord:\/\/|https:\/\/discord(?:app)?\.com\/api/i.test(urls);
|
||||
var isHtml = format === 'html' || format === 'htmlcolor';
|
||||
if (isDiscord && isHtml) {
|
||||
$('#discord-html-format-warning').show();
|
||||
} else {
|
||||
$('#discord-html-format-warning').hide();
|
||||
}
|
||||
}
|
||||
|
||||
$(document).ready(function () {
|
||||
|
||||
$('textarea.notification-urls, select.notification-format').on('change input', checkDiscordHtmlWarning);
|
||||
checkDiscordHtmlWarning();
|
||||
|
||||
$('#add-email-helper').click(function (e) {
|
||||
e.preventDefault();
|
||||
email = prompt("Destination email");
|
||||
|
||||
@@ -116,14 +116,6 @@ $(document).ready(function () {
|
||||
$('#realtime-conn-error').show();
|
||||
});
|
||||
|
||||
// Tell the server we're leaving cleanly so it can release the connection
|
||||
// immediately rather than waiting for a timeout.
|
||||
// Note: this only fires for voluntary closes (tab/window close, navigation away).
|
||||
// Hard kills, crashes and network drops will still timeout normally on the server.
|
||||
window.addEventListener('beforeunload', function () {
|
||||
socket.disconnect();
|
||||
});
|
||||
|
||||
socket.on('queue_size', function (data) {
|
||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||
if(queueSizePagerInfoText) {
|
||||
|
||||
@@ -102,9 +102,7 @@
|
||||
}
|
||||
|
||||
// Navigate to search results (always redirect to watchlist home)
|
||||
// Use base_path if available (for sub-path deployments like /enlighten-richerx)
|
||||
const basePath = typeof base_path !== 'undefined' ? base_path : '';
|
||||
window.location.href = basePath + '/?' + params.toString();
|
||||
window.location.href = '/?' + params.toString();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
/**
|
||||
* sub-tabs.js — Vertical sub-tab switcher.
|
||||
*
|
||||
* Finds every .stab-shell on the page and wires up tab switching.
|
||||
* The shell needs an id= attribute for localStorage persistence.
|
||||
*
|
||||
* HTML contract (generated by _stab.html macros):
|
||||
* .stab-shell#some-id
|
||||
* .stab-nav
|
||||
* button.stab-btn[data-stab="foo"]
|
||||
* .stab-body
|
||||
* .stab-pane[data-stab="foo"]
|
||||
*
|
||||
* Any element inside the shell with data-stab-goto="tab-id" triggers
|
||||
* navigation to that pane when clicked (for CTA buttons etc.).
|
||||
*/
|
||||
|
||||
(function () {
|
||||
'use strict';
|
||||
|
||||
function initShell(shell) {
|
||||
var shellId = shell.id;
|
||||
var storageKey = shellId ? 'stab:' + shellId : null;
|
||||
|
||||
var btns = Array.prototype.slice.call(shell.querySelectorAll('.stab-nav .stab-btn'));
|
||||
var panes = Array.prototype.slice.call(shell.querySelectorAll('.stab-body .stab-pane'));
|
||||
|
||||
if (!btns.length || !panes.length) return;
|
||||
|
||||
var validIds = btns.map(function (b) { return b.dataset.stab; });
|
||||
|
||||
function activate(tabId) {
|
||||
if (validIds.indexOf(tabId) === -1) return;
|
||||
|
||||
btns.forEach(function (b) {
|
||||
b.classList.toggle('active', b.dataset.stab === tabId);
|
||||
});
|
||||
panes.forEach(function (p) {
|
||||
p.classList.toggle('active', p.dataset.stab === tabId);
|
||||
});
|
||||
|
||||
if (storageKey) {
|
||||
try { localStorage.setItem(storageKey, tabId); } catch (e) {}
|
||||
}
|
||||
}
|
||||
|
||||
// Nav button clicks
|
||||
btns.forEach(function (btn) {
|
||||
btn.addEventListener('click', function () { activate(btn.dataset.stab); });
|
||||
});
|
||||
|
||||
// data-stab-goto navigation from CTA buttons anywhere inside the shell
|
||||
shell.addEventListener('click', function (e) {
|
||||
var el = e.target.closest('[data-stab-goto]');
|
||||
if (el && shell.contains(el)) {
|
||||
e.preventDefault();
|
||||
activate(el.dataset.stabGoto);
|
||||
}
|
||||
});
|
||||
|
||||
// Restore persisted tab or fall back to first tab
|
||||
var stored = null;
|
||||
if (storageKey) {
|
||||
try { stored = localStorage.getItem(storageKey); } catch (e) {}
|
||||
}
|
||||
activate(stored && validIds.indexOf(stored) !== -1 ? stored : validIds[0]);
|
||||
}
|
||||
|
||||
function initAll() {
|
||||
var shells = document.querySelectorAll('.stab-shell');
|
||||
shells.forEach(function (shell) { initShell(shell); });
|
||||
}
|
||||
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', initAll);
|
||||
} else {
|
||||
initAll();
|
||||
}
|
||||
}());
|
||||
@@ -10,27 +10,6 @@ $(document).ready(function () {
|
||||
setCookieValue(!isDark);
|
||||
});
|
||||
|
||||
// AI mode toggle — persisted in localStorage
|
||||
(function initAiMode() {
|
||||
const enabled = localStorage.getItem('ai-mode') === 'true';
|
||||
$("html").attr("data-ai-mode", enabled ? "true" : "false");
|
||||
})();
|
||||
|
||||
$(".toggle-ai-mode").on("click", function () {
|
||||
if ($(this).data("llm-configured") !== true && $(this).data("llm-configured") !== "true") {
|
||||
document.getElementById("llm-not-configured-modal").showModal();
|
||||
return;
|
||||
}
|
||||
const current = $("html").attr("data-ai-mode") === "true";
|
||||
const next = !current;
|
||||
$("html").attr("data-ai-mode", next ? "true" : "false");
|
||||
localStorage.setItem('ai-mode', next ? 'true' : 'false');
|
||||
});
|
||||
|
||||
$("#close-llm-not-configured-modal").on("click", function () {
|
||||
document.getElementById("llm-not-configured-modal").close();
|
||||
});
|
||||
|
||||
const setCookieValue = (value) => {
|
||||
document.cookie = `css_dark_mode=${value};max-age=31536000;path=/`
|
||||
}
|
||||
|
||||
@@ -107,118 +107,5 @@ $(function () {
|
||||
|
||||
nowtimeserver = nowtimeserver + time_check_step_size_seconds;
|
||||
}, time_check_step_size_seconds * 1000);
|
||||
|
||||
// LLM / AI features — only active when the server has LLM configured
|
||||
if ($('body').hasClass('llm-configured')) {
|
||||
var i18n = window.watchOverviewI18n || {};
|
||||
var msgGenerating = i18n.generatingSummary || 'Generating summary…';
|
||||
var msgHistory = i18n.gotoHistory || 'Goto full history';
|
||||
|
||||
// Reveal intent textarea on first keydown in the quick-add URL field
|
||||
var $intentWrap = $('#quick-watch-llm-intent');
|
||||
if ($intentWrap.length) {
|
||||
$('#new-watch-form input[name="url"]').one('keydown', function () {
|
||||
$intentWrap.slideDown(200);
|
||||
});
|
||||
}
|
||||
|
||||
// Inline AI summary — clicking the Summary button inserts a row below with AJAX content
|
||||
$(document).on('click', '.ai-history-btn', function (e) {
|
||||
if ($('html').attr('data-ai-mode') !== 'true') return; // normal navigation when AI mode is off
|
||||
|
||||
e.preventDefault();
|
||||
|
||||
var $btn = $(this);
|
||||
var uuid = $btn.data('uuid');
|
||||
var url = $btn.data('summary-url');
|
||||
var $row = $btn.closest('tr');
|
||||
var rowId = 'ai-summary-row-' + uuid;
|
||||
var cols = $row.find('td').length;
|
||||
var $tbody = $row.closest('tbody');
|
||||
|
||||
// Toggle: remove existing row if already open
|
||||
if ($('#' + rowId).length) {
|
||||
$('#' + rowId).remove();
|
||||
$tbody.find('tr:not(.ai-inline-summary-row) td').css('background-color', '');
|
||||
return;
|
||||
}
|
||||
|
||||
// Snapshot row backgrounds BEFORE DOM mutation — inserting a <tr> shifts nth-child parity
|
||||
var $dataRows = $tbody.find('tr:not(.ai-inline-summary-row)');
|
||||
var bgMap = [];
|
||||
$dataRows.each(function () {
|
||||
bgMap.push($(this).find('td:first').css('background-color'));
|
||||
});
|
||||
|
||||
var $summaryRow = $(
|
||||
'<tr class="ai-inline-summary-row" id="' + rowId + '">' +
|
||||
'<td colspan="' + cols + '">' +
|
||||
'<div class="ai-inline-summary-content">' +
|
||||
'<span class="ai-inline-spinner">✨</span>' +
|
||||
'<div class="ai-inline-body">' +
|
||||
'<span class="ai-inline-text">' + $('<span>').text(msgGenerating).html() + '</span>' +
|
||||
'</div>' +
|
||||
'</div>' +
|
||||
'</td></tr>'
|
||||
);
|
||||
$row.after($summaryRow);
|
||||
|
||||
// Re-apply frozen backgrounds so the nth-child parity shift is invisible
|
||||
$dataRows.each(function (i) {
|
||||
$(this).find('td').css('background-color', bgMap[i]);
|
||||
});
|
||||
|
||||
function formatSummary(text) {
|
||||
var sectionRe = /^(Added|Changed|Removed|Updated|New|Deleted)\s*:/i;
|
||||
return text.split('\n').map(function (line) {
|
||||
var safe = $('<span>').text(line).html();
|
||||
return sectionRe.test(line.trim())
|
||||
? safe.replace(/^(\w[\w\s]*)(\s*:)/i, '<strong>$1$2</strong>')
|
||||
: safe;
|
||||
}).join('<br>');
|
||||
}
|
||||
|
||||
var promptUrl = url + '/prompt';
|
||||
|
||||
// Fire both requests simultaneously — prompt returns immediately, summary after LLM
|
||||
$.getJSON(promptUrl)
|
||||
.done(function (data) {
|
||||
if (data.prompt && $summaryRow.find('.ai-inline-summary-content:not(.loaded)').length) {
|
||||
$summaryRow.find('.ai-inline-body').append(
|
||||
'<span class="ai-inline-prompt">' + $('<span>').text(data.prompt).html() + '</span>'
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
$.getJSON(url)
|
||||
.done(function (data) {
|
||||
var $content = $summaryRow.find('.ai-inline-summary-content');
|
||||
var historyUrl = $btn.attr('href');
|
||||
if (data.summary) {
|
||||
$content.addClass('loaded');
|
||||
$content.find('.ai-inline-text').html(formatSummary(data.summary));
|
||||
$content.find('.ai-inline-prompt').remove();
|
||||
$content.find('.ai-inline-body').append(
|
||||
'<a href="' + historyUrl + '" class="ai-inline-history-link">' +
|
||||
$('<span>').text(msgHistory).html() + '</a>'
|
||||
);
|
||||
} else if (data.error) {
|
||||
$summaryRow.find('td').html(
|
||||
'<span class="ai-inline-error">' + $('<span>').text(data.error).html() + '</span>'
|
||||
);
|
||||
} else {
|
||||
$summaryRow.remove();
|
||||
}
|
||||
})
|
||||
.fail(function (xhr) {
|
||||
var msg = (xhr.responseJSON && xhr.responseJSON.error)
|
||||
? xhr.responseJSON.error
|
||||
: 'AI summary request failed (HTTP ' + xhr.status + ').';
|
||||
$summaryRow.find('td').html(
|
||||
'<span class="ai-inline-error">' + $('<span>').text(msg).html() + '</span>'
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -12,14 +12,7 @@ function request_textpreview_update() {
|
||||
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
|
||||
});
|
||||
|
||||
// llm_intent lives in a separate (potentially hidden) tab — include it explicitly
|
||||
const $llmIntent = $('textarea[name="llm_intent"]');
|
||||
if ($llmIntent.length) {
|
||||
data['llm_intent'] = $llmIntent.val();
|
||||
}
|
||||
|
||||
$('body').toggleClass('spinner-active', 1);
|
||||
$('#llm-preview-result').hide();
|
||||
|
||||
$.abortiveSingularAjax({
|
||||
type: "POST",
|
||||
@@ -48,21 +41,6 @@ function request_textpreview_update() {
|
||||
'title': "No change-detection will occur because this text exists."
|
||||
}
|
||||
])
|
||||
|
||||
// LLM preview extraction result
|
||||
const $llmResult = $('#llm-preview-result');
|
||||
if ($llmResult.length && data['llm_evaluation']) {
|
||||
const ev = data['llm_evaluation'];
|
||||
const found = ev['found'];
|
||||
$llmResult.attr('data-found', found ? '1' : '0');
|
||||
$llmResult.find('.llm-preview-verdict').text(
|
||||
found ? '✓ Would trigger a change' : '✗ Would not trigger a change'
|
||||
);
|
||||
$llmResult.find('.llm-preview-answer').text(ev['answer'] || '');
|
||||
$llmResult.show();
|
||||
} else if ($llmResult.length) {
|
||||
$llmResult.hide();
|
||||
}
|
||||
}).fail(function (error) {
|
||||
if (error.statusText === 'abort') {
|
||||
console.log('Request was aborted due to a new request being fired.');
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user