mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-07-02 13:31:18 +00:00
Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 62e1259750 | |||
| 380d8a26a1 | |||
| 02c03fc32b | |||
| db3d38b3ee | |||
| ecd8af94f6 | |||
| e400e463a4 | |||
| 9d355b8f05 | |||
| da43a17541 | |||
| 904eaaaaf7 | |||
| 1e12ae404f | |||
| ec7d56f85d | |||
| 417d57e574 | |||
| 1d7d812eb0 | |||
| 524393a1fb | |||
| b09ebcbef6 | |||
| 30ac10ff24 | |||
| b984426666 | |||
| 1889a10ef6 | |||
| f66ae4fceb | |||
| fb14229888 | |||
| 6d1081f5bc | |||
| 9e907d8466 | |||
| 6d6a0fd7ef | |||
| 1537e58fc2 | |||
| 5669509255 | |||
| 1d72716c69 |
@@ -66,27 +66,27 @@ jobs:
|
||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
uses: docker/setup-qemu-action@v4
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Docker Hub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v4
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v5
|
||||
uses: docker/metadata-action@v6
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
@@ -107,7 +107,7 @@ jobs:
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -131,7 +131,7 @@ jobs:
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v5
|
||||
uses: docker/metadata-action@v6
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
@@ -60,14 +60,14 @@ jobs:
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
uses: docker/setup-qemu-action@v4
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v4
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
@@ -52,4 +52,13 @@ jobs:
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
skip-pypuppeteer: true
|
||||
|
||||
|
||||
test-application-3-14:
|
||||
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.14'
|
||||
skip-pypuppeteer: false
|
||||
|
||||
@@ -42,10 +42,10 @@ jobs:
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v4
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -165,14 +165,14 @@ jobs:
|
||||
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
@@ -188,7 +188,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -230,7 +230,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -270,7 +270,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -306,7 +306,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -334,7 +334,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -504,7 +504,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -544,7 +544,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -574,7 +574,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -598,7 +598,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -643,7 +643,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -706,7 +706,19 @@ jobs:
|
||||
- name: Check upgrade works without error
|
||||
run: |
|
||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||
|
||||
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
# Checkout old version and create datastore
|
||||
git checkout 0.49.1
|
||||
python3 -m venv .venv
|
||||
@@ -820,7 +832,7 @@ jobs:
|
||||
|
||||
- name: Upload upgrade test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/upgrade-test.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.1'
|
||||
__version__ = '0.54.4'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -61,8 +61,22 @@ import time
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||
try:
|
||||
import ctypes as _ctypes
|
||||
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
import functools
|
||||
from flask import make_response
|
||||
from flask_restful import Resource
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_spec_yaml():
|
||||
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
|
||||
import yaml
|
||||
from changedetectionio.api import build_merged_spec_dict
|
||||
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
|
||||
|
||||
|
||||
class Spec(Resource):
|
||||
def get(self):
|
||||
"""Return the merged OpenAPI spec including all registered processor extensions."""
|
||||
return make_response(
|
||||
_get_spec_yaml(),
|
||||
200,
|
||||
{'Content-Type': 'application/yaml'}
|
||||
)
|
||||
@@ -177,6 +177,13 @@ class Tag(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_tag(title=title)
|
||||
if new_uuid:
|
||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||
if extra:
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||
if tag:
|
||||
tag.update(extra)
|
||||
tag.commit()
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported tag", 400
|
||||
|
||||
@@ -3,29 +3,18 @@ from flask import request, abort
|
||||
from loguru import logger
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
import os
|
||||
import yaml # Lazy import - only loaded when API validation is actually used
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
def build_merged_spec_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
|
||||
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the YAML dict directly (not the OpenAPI object).
|
||||
Each processor can provide an api.yaml file alongside its __init__.py that defines
|
||||
additional schemas (e.g., processor_config_restock_diff). These are merged into
|
||||
WatchBase.properties so the spec accurately reflects what the API accepts.
|
||||
|
||||
Plugin processors (via pluggy) are also supported - they just need an api.yaml
|
||||
next to their processor module.
|
||||
|
||||
Returns the merged dict (cached - do not mutate the returned value).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
@@ -35,7 +24,59 @@ def get_openapi_schema_dict():
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
return yaml.safe_load(f)
|
||||
spec_dict = yaml.safe_load(f)
|
||||
|
||||
try:
|
||||
from changedetectionio.processors import find_processors, get_parent_module
|
||||
for module, proc_name in find_processors():
|
||||
parent = get_parent_module(module)
|
||||
if not parent or not hasattr(parent, '__file__'):
|
||||
continue
|
||||
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
|
||||
if not os.path.exists(api_yaml_path):
|
||||
continue
|
||||
with open(api_yaml_path, 'r', encoding='utf-8') as f:
|
||||
proc_spec = yaml.safe_load(f)
|
||||
# Merge schemas
|
||||
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
|
||||
spec_dict['components']['schemas'].update(proc_schemas)
|
||||
# Inject processor_config_{name} into WatchBase if the schema is defined
|
||||
schema_key = f'processor_config_{proc_name}'
|
||||
if schema_key in proc_schemas:
|
||||
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
|
||||
'$ref': f'#/components/schemas/{schema_key}'
|
||||
}
|
||||
# Append x-code-samples from processor paths into existing path operations
|
||||
for path, path_item in proc_spec.get('paths', {}).items():
|
||||
if path not in spec_dict.get('paths', {}):
|
||||
continue
|
||||
for method, operation in path_item.items():
|
||||
if method not in spec_dict['paths'][path]:
|
||||
continue
|
||||
if 'x-code-samples' in operation:
|
||||
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
|
||||
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to merge processor API specs: {e}")
|
||||
|
||||
return spec_dict
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
return OpenAPI.from_dict(build_merged_spec_dict())
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the merged YAML dict (not the OpenAPI object).
|
||||
"""
|
||||
return build_merged_spec_dict()
|
||||
|
||||
@functools.cache
|
||||
def _resolve_schema_properties(schema_name):
|
||||
@@ -150,5 +191,6 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
from .Spec import Spec
|
||||
from .Notifications import Notifications
|
||||
|
||||
|
||||
@@ -40,11 +40,6 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||
for uuid, tag in (tags or {}).items():
|
||||
for f in Path(tag.data_dir).glob('*'):
|
||||
@@ -151,19 +146,22 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||
|
||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
if not backups:
|
||||
abort(404)
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||
abort(404)
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import threading
|
||||
@@ -14,6 +15,16 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
|
||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||
_UUID_RE = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class RestoreForm(Form):
|
||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||
@@ -50,7 +61,18 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||
zf.extractall(tmpdir)
|
||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||
raise ValueError(
|
||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||
)
|
||||
resolved_dest = os.path.realpath(tmpdir)
|
||||
for member in zf.infolist():
|
||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||
zf.extract(member, tmpdir)
|
||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||
|
||||
for entry in os.scandir(tmpdir):
|
||||
@@ -58,6 +80,9 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
||||
continue
|
||||
|
||||
uuid = entry.name
|
||||
if not _UUID_RE.match(uuid):
|
||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||
continue
|
||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||
|
||||
@@ -155,7 +180,9 @@ def construct_restore_blueprint(datastore):
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
form=form,
|
||||
restore_running=any(t.is_alive() for t in restore_threads))
|
||||
restore_running=any(t.is_alive() for t in restore_threads),
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
@@ -173,10 +200,22 @@ def construct_restore_blueprint(datastore):
|
||||
flash(gettext("File must be a .zip backup file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return
|
||||
# Reject oversized uploads before reading the stream into memory.
|
||||
content_length = request.content_length
|
||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return.
|
||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||
try:
|
||||
zip_bytes = io.BytesIO(zip_file.read())
|
||||
zipfile.ZipFile(zip_bytes) # quick validity check before spawning
|
||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
zip_bytes = io.BytesIO(raw)
|
||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||
pass
|
||||
zip_bytes.seek(0)
|
||||
except zipfile.BadZipFile:
|
||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||
@@ -201,6 +240,7 @@ def construct_restore_blueprint(datastore):
|
||||
name="BackupRestore"
|
||||
)
|
||||
restore_thread.start()
|
||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||
restore_threads.append(restore_thread)
|
||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
@@ -19,6 +19,10 @@
|
||||
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB · Max decompressed size: %(decomp)s MB',
|
||||
upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||
|
||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
|
||||
@@ -45,7 +45,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
extra_notification_tokens=datastore.get_unique_notification_tokens_available()
|
||||
)
|
||||
|
||||
|
||||
# Remove the last option 'System default'
|
||||
form.application.form.notification_format.choices.pop()
|
||||
|
||||
@@ -130,12 +129,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Instantiate plugin form with POST data
|
||||
plugin_form = form_class(formdata=request.form)
|
||||
|
||||
# Save plugin settings — use plugin's own save_fn if provided
|
||||
# (allows plugins to strip ephemeral staging fields etc.)
|
||||
save_fn = tab.get('save_fn')
|
||||
if save_fn:
|
||||
save_fn(datastore, plugin_form)
|
||||
elif plugin_form.data:
|
||||
# Save plugin settings (validation is optional for plugins)
|
||||
if plugin_form.data:
|
||||
save_plugin_settings(datastore.datastore_path, plugin_id, plugin_form.data)
|
||||
|
||||
flash(gettext("Settings updated."))
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
{% for tab in plugin_tabs %}
|
||||
@@ -309,7 +308,6 @@ nav
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="proxies">
|
||||
<div id="recommended-proxy">
|
||||
<div>
|
||||
|
||||
@@ -160,6 +160,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
default_system_settings = datastore.data['settings'],
|
||||
)
|
||||
|
||||
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
|
||||
# The API stores processor_config_restock_diff in the tag dict; find the matching
|
||||
# FormField by checking which one's sub-fields cover the config keys.
|
||||
from wtforms.fields.form import FormField as WTFormField
|
||||
for key, value in default.items():
|
||||
if not key.startswith('processor_config_') or not isinstance(value, dict):
|
||||
continue
|
||||
for form_field in form:
|
||||
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
|
||||
for sub_key, sub_value in value.items():
|
||||
sub_field = form_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
break
|
||||
|
||||
template_args = {
|
||||
'data': default,
|
||||
'form': form,
|
||||
|
||||
@@ -116,11 +116,11 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update, llm_summary_q=None):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update):
|
||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||
|
||||
# Register the edit blueprint
|
||||
edit_blueprint = construct_edit_blueprint(datastore, update_q, queuedWatchMetaData, llm_summary_q=llm_summary_q)
|
||||
edit_blueprint = construct_edit_blueprint(datastore, update_q, queuedWatchMetaData)
|
||||
ui_blueprint.register_blueprint(edit_blueprint)
|
||||
|
||||
# Register the notification blueprint
|
||||
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def clear_all_history():
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
confirmtext = request.form.get('confirmtext', '')
|
||||
|
||||
if confirmtext == 'clear':
|
||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
|
||||
@@ -11,7 +11,7 @@ from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, llm_summary_q=None):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
|
||||
def _watch_has_tag_options_set(watch):
|
||||
@@ -117,12 +117,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if processor_config:
|
||||
from wtforms.fields.form import FormField
|
||||
# Populate processor-config-* fields from JSON
|
||||
for config_key, config_value in processor_config.items():
|
||||
field_name = f'processor_config_{config_key}'
|
||||
if hasattr(form, field_name):
|
||||
getattr(form, field_name).data = config_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
||||
if not isinstance(config_value, dict):
|
||||
continue
|
||||
# Try exact API-named field first (e.g., processor_config_restock_diff)
|
||||
target_field = getattr(form, f'processor_config_{config_key}', None)
|
||||
# Fallback: find any FormField sub-form whose fields cover config_value keys
|
||||
if target_field is None:
|
||||
for form_field in form:
|
||||
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
|
||||
target_field = form_field
|
||||
break
|
||||
if target_field is not None:
|
||||
for sub_key, sub_value in config_value.items():
|
||||
sub_field = target_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load processor config: {e}")
|
||||
|
||||
@@ -404,47 +417,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
download_name=filename,
|
||||
mimetype='application/zip')
|
||||
|
||||
@edit_blueprint.route("/edit/<string:uuid>/regenerate-llm-summaries", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_regenerate_llm_summaries(uuid):
|
||||
"""Queue LLM summary generation for all history entries that don't yet have one."""
|
||||
from flask import flash
|
||||
from changedetectionio.llm.tokens import is_llm_data_ready
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404)
|
||||
|
||||
if not llm_summary_q:
|
||||
flash(gettext("LLM summarisation is not configured."), 'error')
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=uuid))
|
||||
|
||||
history = watch.history
|
||||
history_keys = list(history.keys())
|
||||
|
||||
queued = 0
|
||||
# Skip the first entry — there is no prior snapshot to diff against
|
||||
for timestamp in history_keys[1:]:
|
||||
snapshot_fname = history[timestamp]
|
||||
snapshot_id = os.path.basename(snapshot_fname).split('.')[0] # always 32-char MD5
|
||||
|
||||
# Skip entries that already have a summary
|
||||
if is_llm_data_ready(watch.data_dir, snapshot_id):
|
||||
continue
|
||||
|
||||
llm_summary_q.put({
|
||||
'uuid': uuid,
|
||||
'snapshot_id': snapshot_id,
|
||||
'attempts': 0,
|
||||
})
|
||||
queued += 1
|
||||
|
||||
if queued:
|
||||
flash(gettext("Queued %(count)d LLM summaries for generation.", count=queued), 'success')
|
||||
else:
|
||||
flash(gettext("All history entries already have LLM summaries."), 'notice')
|
||||
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=uuid) + '#info')
|
||||
|
||||
# Ajax callback
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])
|
||||
@login_optionally_required
|
||||
|
||||
@@ -10,7 +10,8 @@ from changedetectionio import html_tools
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET'])
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
@@ -74,7 +75,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.args.get('version')
|
||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||
|
||||
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
|
||||
@@ -489,9 +489,6 @@ Math: {{ 1 + 1 }}") }}
|
||||
<p>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_data_package', uuid=uuid)}}" class="pure-button button-small">{{ _('Download watch data package') }}</a>
|
||||
{% if watch.history_n > 1 %}
|
||||
<a href="{{url_for('ui.ui_edit.watch_regenerate_llm_summaries', uuid=uuid)}}" class="pure-button button-small">{{ _('Regenerate LLM summaries') }}</a>
|
||||
{% endif %}
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
@@ -28,6 +28,7 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
|
||||
@@ -81,6 +81,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
proxy_list = datastore.proxy_list
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
@@ -92,7 +93,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=datastore.proxy_list,
|
||||
has_proxies=proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -110,6 +111,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
# Return freed template-building memory to the OS immediately.
|
||||
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.get('share-link'):
|
||||
del (session['share-link'])
|
||||
|
||||
|
||||
@@ -213,12 +213,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'has-error' if error_texts|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
@@ -271,7 +272,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
@@ -305,12 +306,20 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||
{%- if watch['restock']['price'] is number -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
|
||||
{%- set restock = watch['restock'] -%}
|
||||
{%- set price = restock.get('price') -%}
|
||||
{%- set cur = restock.get('currency','') -%}
|
||||
|
||||
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||
{{ price }} {{ cur }}<!-- as string -->
|
||||
{%- endif -%}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -148,10 +148,32 @@ class fetcher(Fetcher):
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||
# (WHATWG encoding sniffing algorithm):
|
||||
# 1. BOM - highest confidence, check before anything else
|
||||
# 2. <meta charset> in first 2kb
|
||||
# 3. chardet statistical detection - last resort
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
boms = [
|
||||
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||
if bom_encoding:
|
||||
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||
r.encoding = bom_encoding
|
||||
else:
|
||||
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||
if meta_charset_match:
|
||||
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||
r.encoding = encoding
|
||||
else:
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -15,7 +16,6 @@ from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_pool
|
||||
import changedetectionio.llm as llm
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
@@ -40,7 +40,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
@@ -57,7 +57,6 @@ extra_stylesheets = []
|
||||
# Use bulletproof janus-based queues for sync/async reliability
|
||||
update_q = RecheckPriorityQueue()
|
||||
notification_q = NotificationQueue()
|
||||
llm_summary_q = llm.create_queue()
|
||||
MAX_QUEUE_SIZE = 5000
|
||||
|
||||
app = Flask(__name__,
|
||||
@@ -219,9 +218,13 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
|
||||
return formatted_value
|
||||
|
||||
@app.template_filter('regex_search')
|
||||
def _jinja2_filter_regex_search(value, pattern):
|
||||
import re
|
||||
return re.search(pattern, str(value)) is not None
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||
@@ -385,6 +388,8 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
|
||||
return ''
|
||||
|
||||
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
"""Sanitize a tag title to create a valid CSS class name.
|
||||
@@ -396,9 +401,8 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
import re
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
||||
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
@@ -486,28 +490,21 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||
|
||||
def get_locale():
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||
# 3. Map browser locale to our internal locale if needed
|
||||
return _locale_aliases.get(browser_locale, browser_locale)
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -596,6 +593,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Spec, '/api/v1/full-spec')
|
||||
|
||||
@login_manager.user_loader
|
||||
def user_loader(email):
|
||||
user = User()
|
||||
@@ -877,7 +876,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# watchlist UI buttons etc
|
||||
import changedetectionio.blueprint.ui as ui
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_pool, queuedWatchMetaData, watch_check_update, llm_summary_q=llm_summary_q))
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_pool, queuedWatchMetaData, watch_check_update))
|
||||
|
||||
import changedetectionio.blueprint.watchlist as watchlist
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
@@ -1000,17 +999,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
).start()
|
||||
logger.info(f"Started {notification_workers} notification worker(s)")
|
||||
|
||||
llm.start_workers(app=app, datastore=datastore, llm_q=llm_summary_q,
|
||||
n_workers=int(os.getenv("LLM_WORKERS", "1")))
|
||||
|
||||
# Register the LLM queue plugin so changes trigger summary jobs
|
||||
from changedetectionio.llm.plugin import LLMQueuePlugin
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_manager.register(LLMQueuePlugin(llm_summary_q), 'llm_queue_plugin')
|
||||
|
||||
# Re-run template path configuration now that all plugins (including LLM) are registered
|
||||
_configure_plugin_templates()
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
@@ -1029,15 +1017,16 @@ def check_for_new_version():
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
session = requests.Session()
|
||||
session.verify = False
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
r = session.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': __version__,
|
||||
'app_guid': datastore.data['app_guid'],
|
||||
'watch_count': len(datastore.data['watching'])
|
||||
},
|
||||
|
||||
verify=False)
|
||||
})
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -1065,65 +1054,19 @@ def notification_runner(worker_id=0):
|
||||
|
||||
else:
|
||||
|
||||
# ── LLM deferred-send gate ─────────────────────────────────────────
|
||||
# If the notification was re-queued to wait for LLM data, honour the
|
||||
# scheduled retry time before doing any further processing.
|
||||
_llm_next_retry = n_object.get('_llm_next_retry_at', 0)
|
||||
if _llm_next_retry and _llm_next_retry > time.time():
|
||||
notification_q.put(n_object)
|
||||
app.config.exit.wait(min(_llm_next_retry - time.time(), 2))
|
||||
continue
|
||||
|
||||
# Apply system-config fallbacks first so we can scan the final body/title.
|
||||
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
|
||||
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
||||
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
|
||||
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
||||
|
||||
# If the body or title references llm_* tokens, wait until LLM data is ready.
|
||||
import re as _re
|
||||
_llm_scan = (n_object.get('notification_body') or '') + ' ' + (n_object.get('notification_title') or '')
|
||||
if _re.search(r'\bllm_(?:summary|headline|importance|sentiment|one_liner)\b', _llm_scan):
|
||||
from changedetectionio.llm.tokens import (
|
||||
is_llm_data_ready, read_llm_tokens,
|
||||
LLM_NOTIFICATION_RETRY_DELAY_SECONDS, LLM_NOTIFICATION_MAX_WAIT_ATTEMPTS,
|
||||
)
|
||||
_llm_uuid = n_object.get('uuid')
|
||||
_llm_watch = datastore.data['watching'].get(_llm_uuid) if _llm_uuid else None
|
||||
_llm_snap_id = n_object.get('_llm_snapshot_id')
|
||||
|
||||
if _llm_watch and _llm_snap_id and not is_llm_data_ready(_llm_watch.data_dir, _llm_snap_id):
|
||||
_llm_attempts = n_object.get('_llm_wait_attempts', 0)
|
||||
if _llm_attempts < LLM_NOTIFICATION_MAX_WAIT_ATTEMPTS:
|
||||
n_object['_llm_wait_attempts'] = _llm_attempts + 1
|
||||
n_object['_llm_next_retry_at'] = time.time() + LLM_NOTIFICATION_RETRY_DELAY_SECONDS
|
||||
notification_q.put(n_object)
|
||||
logger.debug(
|
||||
f"Notification gate: LLM data pending for {_llm_uuid} "
|
||||
f"(attempt {n_object['_llm_wait_attempts']}/{LLM_NOTIFICATION_MAX_WAIT_ATTEMPTS})"
|
||||
)
|
||||
continue
|
||||
else:
|
||||
logger.warning(
|
||||
f"Notification: LLM data never arrived for {_llm_uuid} after "
|
||||
f"{LLM_NOTIFICATION_MAX_WAIT_ATTEMPTS} attempts — sending without LLM tokens"
|
||||
)
|
||||
elif _llm_watch and _llm_snap_id:
|
||||
# Data is ready — populate the LLM tokens into n_object
|
||||
_llm_data = read_llm_tokens(_llm_watch.data_dir, _llm_snap_id)
|
||||
n_object['llm_summary'] = _llm_data.get('summary', '')
|
||||
n_object['llm_headline'] = _llm_data.get('headline', '')
|
||||
n_object['llm_importance'] = _llm_data.get('importance')
|
||||
n_object['llm_sentiment'] = _llm_data.get('sentiment', '')
|
||||
n_object['llm_one_liner'] = _llm_data.get('one_liner', '')
|
||||
# ── end LLM gate ───────────────────────────────────────────────────
|
||||
|
||||
now = datetime.now()
|
||||
sent_obj = None
|
||||
|
||||
try:
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
|
||||
# Fallback to system config if not set
|
||||
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
|
||||
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
||||
|
||||
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
|
||||
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
||||
|
||||
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
|
||||
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
||||
if n_object.get('notification_urls', {}):
|
||||
|
||||
@@ -608,13 +608,12 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
from changedetectionio.html_tools import SafeXPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
|
||||
@@ -23,6 +23,53 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||
'unparsed-text',
|
||||
'unparsed-text-lines',
|
||||
'unparsed-text-available',
|
||||
'doc',
|
||||
'doc-available',
|
||||
'environment-variable',
|
||||
'available-environment-variables',
|
||||
]
|
||||
|
||||
|
||||
def _build_safe_xpath3_parser():
|
||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||
|
||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||
- doc / doc-available (XML fetch from URI)
|
||||
- environment-variable / available-environment-variables (env var leakage)
|
||||
|
||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||
so removing entries here does not affect XPath3Parser itself.
|
||||
|
||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||
"""
|
||||
import os
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
class SafeXPath3Parser(XPath3Parser):
|
||||
pass
|
||||
|
||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||
if env_override is not None:
|
||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||
else:
|
||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||
|
||||
for _fn in blocked:
|
||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||
|
||||
return SafeXPath3Parser
|
||||
|
||||
|
||||
# Module-level singleton — built once, reused everywhere.
|
||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
@@ -183,8 +230,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
tree = None
|
||||
@@ -210,7 +255,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
@@ -235,6 +280,9 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
# Drop element references before the finally block so tree.clear() can release
|
||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||
del r
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
|
||||
@@ -37,6 +37,7 @@ def get_timeago_locale(flask_locale):
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
@@ -67,6 +68,7 @@ LANGUAGE_DATA = {
|
||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||
'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
"""
|
||||
changedetectionio.llm
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
LLM summary queue and workers.
|
||||
|
||||
Usage in flask_app.py
|
||||
---------------------
|
||||
|
||||
import changedetectionio.llm as llm
|
||||
|
||||
# At module level alongside notification_q:
|
||||
llm_summary_q = llm.create_queue()
|
||||
|
||||
# Inside changedetection_app(), after datastore is ready:
|
||||
llm.start_workers(
|
||||
app=app,
|
||||
datastore=datastore,
|
||||
llm_q=llm_summary_q,
|
||||
n_workers=int(os.getenv("LLM_WORKERS", "1")),
|
||||
)
|
||||
|
||||
Enqueueing a summary job (e.g. from the pluggy update_finalize hook)
|
||||
---------------------------------------------------------------------
|
||||
|
||||
if changed_detected and not processing_exception:
|
||||
llm_summary_q.put({
|
||||
'uuid': watch_uuid,
|
||||
'snapshot_id': snapshot_id,
|
||||
'attempts': 0,
|
||||
})
|
||||
"""
|
||||
|
||||
import queue
|
||||
import threading
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def create_queue() -> queue.Queue:
|
||||
"""Return a plain Queue for LLM summary jobs. No maxsize — jobs are small dicts."""
|
||||
return queue.Queue()
|
||||
|
||||
|
||||
def start_workers(app, datastore, llm_q: queue.Queue, n_workers: int = 1) -> None:
|
||||
"""
|
||||
Start N LLM summary worker threads.
|
||||
|
||||
Args:
|
||||
app: Flask application instance (for app_context and exit event)
|
||||
datastore: Application datastore
|
||||
llm_q: Queue returned by create_queue()
|
||||
n_workers: Number of parallel workers (default 1; increase for local Ollama)
|
||||
"""
|
||||
from changedetectionio.llm.queue_worker import llm_summary_runner
|
||||
|
||||
for i in range(n_workers):
|
||||
threading.Thread(
|
||||
target=llm_summary_runner,
|
||||
args=(i, app, datastore, llm_q),
|
||||
daemon=True,
|
||||
name=f"LLMSummaryWorker-{i}",
|
||||
).start()
|
||||
|
||||
logger.info(f"Started {n_workers} LLM summary worker(s)")
|
||||
@@ -1,104 +0,0 @@
|
||||
"""
|
||||
LLM plugin — provides settings tab and enqueues summary jobs on change detection.
|
||||
|
||||
Registered with the pluggy plugin manager at startup (flask_app.py).
|
||||
The worker (llm/queue_worker.py) drains the queue asynchronously.
|
||||
"""
|
||||
from loguru import logger
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
|
||||
def get_llm_settings(datastore):
|
||||
"""Load LLM plugin settings with fallback to legacy datastore settings.
|
||||
|
||||
Tries the plugin settings file (llm.json) first.
|
||||
Falls back to the old storage location in datastore.data['settings']['application']
|
||||
for users upgrading from a version before LLM became a first-class plugin.
|
||||
"""
|
||||
from changedetectionio.pluggy_interface import load_plugin_settings
|
||||
settings = load_plugin_settings(datastore.datastore_path, 'llm')
|
||||
|
||||
if settings.get('llm_connection') is not None:
|
||||
return settings
|
||||
|
||||
# Legacy fallback: settings were stored in datastore application settings
|
||||
app_settings = datastore.data['settings']['application']
|
||||
connections_dict = app_settings.get('llm_connections') or {}
|
||||
connections_list = [
|
||||
{
|
||||
'connection_id': k,
|
||||
'name': v.get('name', ''),
|
||||
'model': v.get('model', ''),
|
||||
'api_key': v.get('api_key', ''),
|
||||
'api_base': v.get('api_base', ''),
|
||||
'tokens_per_minute': int(v.get('tokens_per_minute', 0) or 0),
|
||||
'is_default': bool(v.get('is_default', False)),
|
||||
}
|
||||
for k, v in connections_dict.items()
|
||||
]
|
||||
|
||||
return {
|
||||
'llm_connection': connections_list,
|
||||
'llm_summary_prompt': app_settings.get('llm_summary_prompt', ''),
|
||||
}
|
||||
|
||||
|
||||
def save_llm_settings(datastore, plugin_form):
|
||||
"""Custom save handler — strips the ephemeral new_connection staging fields
|
||||
so they are never persisted to llm.json."""
|
||||
from changedetectionio.pluggy_interface import save_plugin_settings
|
||||
data = {
|
||||
'llm_connection': plugin_form.llm_connection.data,
|
||||
'llm_summary_prompt': plugin_form.llm_summary_prompt.data or '',
|
||||
'llm_diff_context_lines': plugin_form.llm_diff_context_lines.data or 2,
|
||||
}
|
||||
save_plugin_settings(datastore.datastore_path, 'llm', data)
|
||||
|
||||
|
||||
class LLMQueuePlugin:
|
||||
"""Enqueues LLM summary jobs on successful change detection and provides settings tab."""
|
||||
|
||||
def __init__(self, llm_q):
|
||||
self.llm_q = llm_q
|
||||
|
||||
@hookimpl
|
||||
def plugin_settings_tab(self):
|
||||
from changedetectionio.llm.settings_form import LLMSettingsForm
|
||||
return {
|
||||
'plugin_id': 'llm',
|
||||
'tab_label': 'LLM',
|
||||
'form_class': LLMSettingsForm,
|
||||
'template_path': 'settings-llm.html',
|
||||
'save_fn': save_llm_settings,
|
||||
}
|
||||
|
||||
@hookimpl
|
||||
def update_finalize(self, update_handler, watch, datastore, processing_exception,
|
||||
changed_detected=False, snapshot_id=None):
|
||||
"""Queue an LLM summary job when a change was successfully detected."""
|
||||
|
||||
if not changed_detected or processing_exception or not snapshot_id:
|
||||
return
|
||||
|
||||
if watch is None:
|
||||
return
|
||||
|
||||
# Need ≥2 history entries — first entry has nothing to diff against
|
||||
if watch.history_n < 2:
|
||||
return
|
||||
|
||||
# Only queue when at least one LLM connection is configured
|
||||
llm_settings = get_llm_settings(datastore)
|
||||
has_connection = bool(
|
||||
llm_settings.get('llm_connection')
|
||||
or datastore.data['settings']['application'].get('llm_api_key') # legacy
|
||||
or datastore.data['settings']['application'].get('llm_model') # legacy
|
||||
or watch.get('llm_api_key')
|
||||
or watch.get('llm_model')
|
||||
)
|
||||
if not has_connection:
|
||||
return
|
||||
|
||||
uuid = watch.get('uuid')
|
||||
self.llm_q.put({'uuid': uuid, 'snapshot_id': snapshot_id, 'attempts': 0})
|
||||
logger.debug(f"LLM: queued summary for uuid={uuid} snapshot={snapshot_id}")
|
||||
@@ -1,544 +0,0 @@
|
||||
import fcntl
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.llm.tokens import (
|
||||
STRUCTURED_OUTPUT_INSTRUCTION,
|
||||
parse_llm_response,
|
||||
write_llm_data,
|
||||
)
|
||||
|
||||
MAX_RETRIES = 5
|
||||
RETRY_BACKOFF_BASE_SECONDS = 60 # 1m, 2m, 4m, 8m, 16m
|
||||
|
||||
# Token thresholds that control which summarisation strategy is used.
|
||||
# Small diffs: single-pass summarise.
|
||||
# Larger diffs: two-pass (enumerate all changes first, then compress).
|
||||
# Very large diffs: map-reduce (chunk → enumerate per chunk → final synthesis).
|
||||
TOKEN_SINGLE_PASS_THRESHOLD = 5000 # below this: one call
|
||||
TOKEN_TWO_PASS_THRESHOLD = 15000 # below this: enumerate then summarise
|
||||
TOKEN_CHUNK_SIZE = 5000 # tokens per map-reduce chunk
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Proactive token-bucket rate limiter — shared across all workers in process
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _RateLimitWait(Exception):
|
||||
"""Raised when the bucket is empty; worker re-queues without incrementing attempts."""
|
||||
def __init__(self, wait_seconds):
|
||||
self.wait_seconds = wait_seconds
|
||||
super().__init__(f"Rate limit: wait {wait_seconds:.1f}s")
|
||||
|
||||
|
||||
class _TokenBucket:
|
||||
"""Thread-safe continuous token bucket. tpm=0 means unlimited."""
|
||||
|
||||
def __init__(self, tpm):
|
||||
self._lock = threading.Lock()
|
||||
self._tpm = tpm
|
||||
self._tokens = float(tpm) # start full
|
||||
self._last_ts = time.monotonic()
|
||||
|
||||
def try_consume(self, n):
|
||||
"""Consume n tokens. Returns (True, 0.0) on success or (False, wait_secs) if dry."""
|
||||
if self._tpm == 0:
|
||||
return True, 0.0
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_ts
|
||||
self._tokens = min(self._tpm, self._tokens + elapsed * (self._tpm / 60.0))
|
||||
self._last_ts = now
|
||||
if self._tokens >= n:
|
||||
self._tokens -= n
|
||||
return True, 0.0
|
||||
deficit = n - self._tokens
|
||||
return False, deficit / (self._tpm / 60.0)
|
||||
|
||||
|
||||
_rate_buckets = {}
|
||||
_rate_buckets_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_rate_bucket(conn_id, tpm):
|
||||
"""Return (or lazily create) the shared _TokenBucket for this connection."""
|
||||
with _rate_buckets_lock:
|
||||
if conn_id not in _rate_buckets:
|
||||
_rate_buckets[conn_id] = _TokenBucket(int(tpm or 0))
|
||||
return _rate_buckets[conn_id]
|
||||
|
||||
|
||||
def _parse_retry_after(exc):
|
||||
"""Extract a retry-after delay (seconds) from a litellm RateLimitError."""
|
||||
if hasattr(exc, 'retry_after') and exc.retry_after:
|
||||
try:
|
||||
return float(exc.retry_after)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
m = re.search(r'(?:try again in|retry after)\s*([\d.]+)\s*s', str(exc), re.IGNORECASE)
|
||||
return float(m.group(1)) + 1.0 if m else 60.0
|
||||
|
||||
|
||||
def _read_snapshot(watch, snapshot_fname):
|
||||
"""Read a snapshot file from disk, handling plain text and brotli compression."""
|
||||
path = os.path.join(watch.data_dir, snapshot_fname)
|
||||
if snapshot_fname.endswith('.br'):
|
||||
import brotli
|
||||
with open(path, 'rb') as f:
|
||||
return brotli.decompress(f.read()).decode('utf-8', errors='replace')
|
||||
else:
|
||||
with open(path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def _append_llm_log(log_path, model, sent_tokens, recv_tokens, elapsed_ms):
|
||||
"""Append one line to the datastore-level LLM activity log.
|
||||
|
||||
Line format (tab-separated, LF terminated):
|
||||
ISO-8601-UTC fetched LLM via <model> sent=<N> recv=<N> ms=<N>
|
||||
|
||||
The file is flock-locked for the duration of the write so concurrent
|
||||
workers don't interleave lines.
|
||||
"""
|
||||
ts = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] # ms precision
|
||||
line = f"{ts}\tfetched LLM via {model}\tsent={sent_tokens}\trecv={recv_tokens}\tms={elapsed_ms}\n"
|
||||
try:
|
||||
with open(log_path, 'a', encoding='utf-8', newline='\n') as f:
|
||||
fcntl.flock(f, fcntl.LOCK_EX)
|
||||
try:
|
||||
f.write(line)
|
||||
f.flush()
|
||||
finally:
|
||||
fcntl.flock(f, fcntl.LOCK_UN)
|
||||
except Exception as exc:
|
||||
logger.warning(f"LLM log write failed: {exc}")
|
||||
|
||||
|
||||
def _call_llm(model, messages, api_key=None, api_base=None, max_tokens=600, conn_id=None, tpm=0, log_path=None):
|
||||
"""
|
||||
Thin wrapper around litellm.completion.
|
||||
Isolated as a named function so tests can mock.patch it without importing litellm.
|
||||
|
||||
Determinism settings
|
||||
--------------------
|
||||
temperature=0 — greedy decoding; same input produces the same output consistently.
|
||||
seed=0 — passed through to providers that support it (OpenAI, some others)
|
||||
for near-bit-identical reproducibility across calls.
|
||||
|
||||
Deliberately NOT set
|
||||
--------------------
|
||||
top_p — redundant at temperature=0 and can interact badly with some providers.
|
||||
frequency_penalty / presence_penalty — would penalise the model for repeating specific
|
||||
values (e.g. "$10 → $10") which is exactly wrong for change detection.
|
||||
|
||||
max_tokens — caller sets this based on the pass type:
|
||||
enumerate pass needs more room than the final summary pass.
|
||||
|
||||
conn_id / tpm — optional rate limiting; when both are set, a proactive token-bucket
|
||||
check is performed before calling the API. Raises _RateLimitWait if
|
||||
the bucket is empty so the worker can re-queue without retrying.
|
||||
|
||||
log_path — when set, each call is appended to the datastore LLM activity log.
|
||||
|
||||
Returns the response text string.
|
||||
"""
|
||||
import litellm
|
||||
|
||||
# Proactive rate check (skipped when tpm=0 or conn_id is None)
|
||||
if conn_id and tpm:
|
||||
prompt_tokens = litellm.token_counter(model=model, messages=messages)
|
||||
total_est = prompt_tokens + max_tokens
|
||||
bucket = _get_rate_bucket(conn_id, tpm)
|
||||
ok, wait = bucket.try_consume(total_est)
|
||||
if not ok:
|
||||
raise _RateLimitWait(wait)
|
||||
|
||||
kwargs = dict(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
seed=0,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
if api_key:
|
||||
kwargs['api_key'] = api_key
|
||||
if api_base:
|
||||
kwargs['api_base'] = api_base
|
||||
|
||||
t0 = time.monotonic()
|
||||
response = litellm.completion(**kwargs)
|
||||
elapsed_ms = round((time.monotonic() - t0) * 1000)
|
||||
|
||||
if log_path:
|
||||
usage = getattr(response, 'usage', None)
|
||||
sent_tok = getattr(usage, 'prompt_tokens', 0) or 0
|
||||
recv_tok = getattr(usage, 'completion_tokens', 0) or 0
|
||||
_append_llm_log(log_path, model, sent_tok, recv_tok, elapsed_ms)
|
||||
|
||||
return response.choices[0].message.content.strip()
|
||||
|
||||
|
||||
|
||||
def _resolve_llm_connection(watch, datastore):
|
||||
"""Return (model, api_key, api_base, conn_id, tpm) for the given watch.
|
||||
|
||||
Resolution order:
|
||||
1. Watch-level connection_id pointing to a named entry in plugin settings.
|
||||
2. The default entry in plugin settings (is_default=True).
|
||||
3. Legacy flat fields on the watch or in global settings — backward compat.
|
||||
4. Hard-coded fallback: gpt-4o-mini with no key / base.
|
||||
"""
|
||||
from changedetectionio.llm.plugin import get_llm_settings
|
||||
from changedetectionio.llm.settings_form import sanitised_conn_id
|
||||
|
||||
llm_settings = get_llm_settings(datastore)
|
||||
connections = llm_settings.get('llm_connection') or []
|
||||
|
||||
# 1. Watch-level override by explicit connection_id
|
||||
watch_conn_id = watch.get('llm_connection_id')
|
||||
if watch_conn_id:
|
||||
for c in connections:
|
||||
if c.get('connection_id') == watch_conn_id:
|
||||
cid = sanitised_conn_id(c.get('connection_id', ''))
|
||||
return (c.get('model', 'gpt-4o-mini'), c.get('api_key', ''), c.get('api_base', ''),
|
||||
cid, int(c.get('tokens_per_minute', 0) or 0))
|
||||
|
||||
# 2. Global default connection
|
||||
for c in connections:
|
||||
if c.get('is_default'):
|
||||
cid = sanitised_conn_id(c.get('connection_id', ''))
|
||||
return (c.get('model', 'gpt-4o-mini'), c.get('api_key', ''), c.get('api_base', ''),
|
||||
cid, int(c.get('tokens_per_minute', 0) or 0))
|
||||
|
||||
# 3. Legacy flat fields (backward compat)
|
||||
app_settings = datastore.data['settings']['application']
|
||||
model = watch.get('llm_model') or app_settings.get('llm_model', 'gpt-4o-mini')
|
||||
api_key = watch.get('llm_api_key') or app_settings.get('llm_api_key', '')
|
||||
api_base = watch.get('llm_api_base') or app_settings.get('llm_api_base', '')
|
||||
return model, api_key, api_base, 'legacy', 0
|
||||
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
'You are a change detection assistant. '
|
||||
'Be precise and factual. Never speculate. '
|
||||
'Always use exact numbers, values, and quoted text when present in the diff. '
|
||||
'If nothing meaningful changed, say so explicitly.'
|
||||
)
|
||||
|
||||
|
||||
def _build_context_header(watch, datastore):
|
||||
"""Return a short multi-line string describing what this watch monitors.
|
||||
|
||||
Included lines (only when non-empty / non-redundant):
|
||||
URL: <url>
|
||||
Monitor: <user title or fetched page title> (omitted when same as URL)
|
||||
Tags: <comma-separated tag titles> (omitted when none)
|
||||
"""
|
||||
url = watch.get('url', '')
|
||||
title = watch.get('title', '') or watch.get('page_title', '')
|
||||
|
||||
lines = [f"URL: {url}"]
|
||||
if title and title != url:
|
||||
lines.append(f"Monitor: {title}")
|
||||
|
||||
tag_titles = []
|
||||
for tag_uuid in (watch.get('tags') or []):
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid, {})
|
||||
t = tag.get('title', '').strip()
|
||||
if t:
|
||||
tag_titles.append(t)
|
||||
if tag_titles:
|
||||
lines.append(f"Tags: {', '.join(tag_titles)}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def _chunk_lines(lines, model, chunk_token_size):
|
||||
"""Split lines into chunks that each fit within chunk_token_size tokens."""
|
||||
import litellm
|
||||
chunks, current, current_tokens = [], [], 0
|
||||
for line in lines:
|
||||
line_tokens = litellm.token_counter(model=model, text=line)
|
||||
if current and current_tokens + line_tokens > chunk_token_size:
|
||||
chunks.append('\n'.join(current))
|
||||
current, current_tokens = [], 0
|
||||
current.append(line)
|
||||
current_tokens += line_tokens
|
||||
if current:
|
||||
chunks.append('\n'.join(current))
|
||||
return chunks
|
||||
|
||||
|
||||
def _enumerate_changes(diff_text, context_header, model, llm_kwargs):
|
||||
"""
|
||||
Pass 1 — ask the model to list every distinct change exhaustively, one per line.
|
||||
Returns a plain-text list string.
|
||||
This avoids compression decisions: the model just lists, it does not prioritise.
|
||||
"""
|
||||
messages = [
|
||||
{'role': 'system', 'content': SYSTEM_PROMPT},
|
||||
{
|
||||
'role': 'user',
|
||||
'content': (
|
||||
f"{context_header}\n"
|
||||
f"Diff:\n{diff_text}\n\n"
|
||||
"List every distinct change you see, one item per line. "
|
||||
"Be exhaustive — do not filter or prioritise. "
|
||||
"Use exact values from the diff (prices, dates, counts, quoted text)."
|
||||
),
|
||||
},
|
||||
]
|
||||
# Enumerate pass needs more output room than the final summary
|
||||
return _call_llm(model=model, messages=messages, max_tokens=1200, **llm_kwargs)
|
||||
|
||||
|
||||
def _summarise_enumeration(enumerated, context_header, model, llm_kwargs, summary_instruction=None):
|
||||
"""
|
||||
Pass 2 — compress the exhaustive enumeration into the final output.
|
||||
Operates on a small, structured input so nothing is lost that wasn't already listed.
|
||||
summary_instruction overrides the default STRUCTURED_OUTPUT_INSTRUCTION when set.
|
||||
"""
|
||||
instruction = summary_instruction or (
|
||||
"Now produce the final structured output for all of these changes.\n\n"
|
||||
+ STRUCTURED_OUTPUT_INSTRUCTION
|
||||
)
|
||||
messages = [
|
||||
{'role': 'system', 'content': SYSTEM_PROMPT},
|
||||
{
|
||||
'role': 'user',
|
||||
'content': (
|
||||
f"{context_header}\n"
|
||||
f"All changes detected:\n{enumerated}\n\n"
|
||||
+ instruction
|
||||
),
|
||||
},
|
||||
]
|
||||
return _call_llm(model=model, messages=messages, max_tokens=500, **llm_kwargs)
|
||||
|
||||
|
||||
def process_llm_summary(item, datastore):
|
||||
"""
|
||||
Generate an LLM summary for a detected change and write {snapshot_id}-llm.txt.
|
||||
|
||||
item keys:
|
||||
uuid - watch UUID
|
||||
snapshot_id - the newer snapshot ID (md5 hex), maps to {snapshot_id}.txt[.br]
|
||||
attempts - retry counter
|
||||
|
||||
Summarisation strategy (chosen by diff token count):
|
||||
Small (< SINGLE_PASS_TOKEN_LIMIT): one call — enumerate + summarise together.
|
||||
Medium (< TWO_PASS_TOKEN_LIMIT): two calls — enumerate all changes, then compress.
|
||||
Large (≥ TWO_PASS_TOKEN_LIMIT): map-reduce — chunk → enumerate per chunk →
|
||||
synthesise chunk enumerations → final summary.
|
||||
|
||||
The two-pass / map-reduce approach prevents lossiness: temperature=0 causes the model
|
||||
to greedily commit to the most prominent change and drop the rest in a single pass.
|
||||
Enumerating first forces comprehensive coverage before any compression happens.
|
||||
|
||||
Split into _call_llm / _write_summary so each step is independently patchable in tests.
|
||||
"""
|
||||
import difflib
|
||||
import litellm
|
||||
|
||||
uuid = item['uuid']
|
||||
snapshot_id = item['snapshot_id']
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
raise ValueError(f"Watch {uuid} not found")
|
||||
|
||||
# Find this snapshot and the one before it in history
|
||||
history = watch.history
|
||||
history_keys = list(history.keys())
|
||||
|
||||
try:
|
||||
idx = next(
|
||||
i for i, k in enumerate(history_keys)
|
||||
if os.path.basename(history[k]).split('.')[0] == snapshot_id
|
||||
)
|
||||
except StopIteration:
|
||||
raise ValueError(f"snapshot_id {snapshot_id} not found in history for watch {uuid}")
|
||||
|
||||
if idx == 0:
|
||||
raise ValueError(f"snapshot_id {snapshot_id} is the first history entry — no prior to diff against")
|
||||
|
||||
before_text = _read_snapshot(watch, history[history_keys[idx - 1]])
|
||||
current_text = _read_snapshot(watch, history[history_keys[idx]])
|
||||
|
||||
# Resolve model / credentials via connections table (with legacy flat-field fallback)
|
||||
model, api_key, api_base, conn_id, tpm = _resolve_llm_connection(watch, datastore)
|
||||
url = watch.get('url', '')
|
||||
context_header = _build_context_header(watch, datastore)
|
||||
|
||||
llm_kwargs = {
|
||||
'log_path': os.path.join(datastore.datastore_path, 'llm-log.txt'),
|
||||
}
|
||||
if api_key:
|
||||
llm_kwargs['api_key'] = api_key
|
||||
if api_base:
|
||||
llm_kwargs['api_base'] = api_base
|
||||
if conn_id:
|
||||
llm_kwargs['conn_id'] = conn_id
|
||||
if tpm:
|
||||
llm_kwargs['tpm'] = tpm
|
||||
|
||||
# Use custom prompt / context-line setting if configured
|
||||
from changedetectionio.llm.plugin import get_llm_settings
|
||||
llm_settings = get_llm_settings(datastore)
|
||||
custom_prompt = (llm_settings.get('llm_summary_prompt') or '').strip()
|
||||
summary_instruction = custom_prompt if custom_prompt else (
|
||||
"Analyse all changes in this diff.\n\n" + STRUCTURED_OUTPUT_INSTRUCTION
|
||||
)
|
||||
context_n = int(llm_settings.get('llm_diff_context_lines') or 2)
|
||||
|
||||
diff_lines = list(difflib.unified_diff(
|
||||
before_text.splitlines(),
|
||||
current_text.splitlines(),
|
||||
lineterm='',
|
||||
n=context_n,
|
||||
))
|
||||
diff_text = '\n'.join(diff_lines)
|
||||
|
||||
if not diff_text.strip():
|
||||
logger.debug(f"LLM: no diff content for {uuid}/{snapshot_id}, skipping")
|
||||
return
|
||||
|
||||
diff_tokens = litellm.token_counter(model=model, text=diff_text)
|
||||
logger.debug(f"LLM: diff is {diff_tokens} tokens for {uuid}/{snapshot_id}")
|
||||
|
||||
if diff_tokens < TOKEN_SINGLE_PASS_THRESHOLD:
|
||||
# Small diff — single call, model can see everything at once
|
||||
messages = [
|
||||
{'role': 'system', 'content': SYSTEM_PROMPT},
|
||||
{
|
||||
'role': 'user',
|
||||
'content': (
|
||||
f"{context_header}\n"
|
||||
f"Diff:\n{diff_text}\n\n"
|
||||
+ summary_instruction
|
||||
),
|
||||
},
|
||||
]
|
||||
raw = _call_llm(model=model, messages=messages, max_tokens=500, **llm_kwargs)
|
||||
strategy = 'single'
|
||||
|
||||
elif diff_tokens < TOKEN_TWO_PASS_THRESHOLD:
|
||||
# Medium diff — two-pass: enumerate exhaustively, then compress
|
||||
enumerated = _enumerate_changes(diff_text, context_header, model, llm_kwargs)
|
||||
raw = _summarise_enumeration(enumerated, context_header, model, llm_kwargs, summary_instruction)
|
||||
strategy = 'two-pass'
|
||||
|
||||
else:
|
||||
# Large diff — map-reduce: chunk → enumerate per chunk → synthesise
|
||||
chunks = _chunk_lines(diff_lines, model, TOKEN_CHUNK_SIZE)
|
||||
logger.debug(f"LLM: map-reduce over {len(chunks)} chunks for {uuid}/{snapshot_id}")
|
||||
|
||||
chunk_enumerations = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
logger.debug(f"LLM: enumerating chunk {i+1}/{len(chunks)}")
|
||||
chunk_enumerations.append(
|
||||
_enumerate_changes(chunk, context_header, model, llm_kwargs)
|
||||
)
|
||||
|
||||
combined = '\n'.join(chunk_enumerations)
|
||||
raw = _summarise_enumeration(combined, context_header, model, llm_kwargs, summary_instruction)
|
||||
strategy = 'map-reduce'
|
||||
|
||||
llm_data = parse_llm_response(raw)
|
||||
write_llm_data(watch.data_dir, snapshot_id, llm_data)
|
||||
logger.info(f"LLM tokens written for {uuid}/{snapshot_id} (strategy: {strategy}, tokens: {diff_tokens})")
|
||||
|
||||
|
||||
def llm_summary_runner(worker_id, app, datastore, llm_q):
|
||||
"""
|
||||
Sync LLM summary worker — mirrors the notification_runner pattern.
|
||||
|
||||
One worker is the right default (LLM API rate limits constrain throughput
|
||||
more than parallelism helps). Increase via LLM_WORKERS env var if using
|
||||
a local Ollama endpoint with no rate limits.
|
||||
|
||||
Failed items are re-queued with exponential backoff (see MAX_RETRIES /
|
||||
RETRY_BACKOFF_BASE_SECONDS). After MAX_RETRIES the item is dropped and
|
||||
the failure is recorded on the watch.
|
||||
"""
|
||||
with app.app_context():
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
item = llm_q.get(block=False)
|
||||
except queue.Empty:
|
||||
app.config.exit.wait(1)
|
||||
continue
|
||||
|
||||
# Honour retry delay — if the item isn't due yet, put it back
|
||||
# and sleep briefly rather than spinning.
|
||||
next_retry_at = item.get('next_retry_at', 0)
|
||||
if next_retry_at > time.time():
|
||||
llm_q.put(item)
|
||||
app.config.exit.wait(min(next_retry_at - time.time(), 5))
|
||||
continue
|
||||
|
||||
uuid = item.get('uuid')
|
||||
snapshot_id = item.get('snapshot_id')
|
||||
attempts = item.get('attempts', 0)
|
||||
|
||||
logger.debug(f"LLM worker {worker_id} processing uuid={uuid} snapshot={snapshot_id} attempt={attempts}")
|
||||
|
||||
try:
|
||||
process_llm_summary(item, datastore)
|
||||
logger.info(f"LLM worker {worker_id} completed summary for uuid={uuid} snapshot={snapshot_id}")
|
||||
|
||||
except NotImplementedError:
|
||||
# Silently drop until the processor is implemented
|
||||
logger.debug(f"LLM worker {worker_id} skipping — processor not yet implemented")
|
||||
|
||||
except _RateLimitWait as rw:
|
||||
# Proactive bucket empty — re-queue without counting as a failure
|
||||
item['next_retry_at'] = time.time() + rw.wait_seconds
|
||||
llm_q.put(item)
|
||||
logger.info(
|
||||
f"LLM worker {worker_id} rate-limited (proactive) for {rw.wait_seconds:.1f}s "
|
||||
f"uuid={uuid}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Reactive: check if the API itself returned a rate-limit error
|
||||
try:
|
||||
import litellm as _litellm
|
||||
if isinstance(e, _litellm.RateLimitError):
|
||||
wait = _parse_retry_after(e)
|
||||
item['next_retry_at'] = time.time() + wait
|
||||
llm_q.put(item)
|
||||
logger.warning(
|
||||
f"LLM worker {worker_id} API rate limit for uuid={uuid}, "
|
||||
f"retry in {wait:.1f}s"
|
||||
)
|
||||
continue
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
logger.error(f"LLM worker {worker_id} error for uuid={uuid} snapshot={snapshot_id}: {e}")
|
||||
|
||||
if attempts < MAX_RETRIES:
|
||||
backoff = RETRY_BACKOFF_BASE_SECONDS * (2 ** attempts)
|
||||
item['attempts'] = attempts + 1
|
||||
item['next_retry_at'] = time.time() + backoff
|
||||
llm_q.put(item)
|
||||
logger.info(
|
||||
f"LLM worker {worker_id} re-queued uuid={uuid} "
|
||||
f"attempt={item['attempts']}/{MAX_RETRIES} retry_in={backoff}s"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"LLM worker {worker_id} gave up on uuid={uuid} snapshot={snapshot_id} "
|
||||
f"after {MAX_RETRIES} attempts"
|
||||
)
|
||||
if uuid and uuid in datastore.data['watching']:
|
||||
datastore.update_watch(
|
||||
uuid=uuid,
|
||||
update_obj={'last_error': f"LLM summary failed after {MAX_RETRIES} attempts: {e}"}
|
||||
)
|
||||
@@ -1,139 +0,0 @@
|
||||
import re
|
||||
import uuid as _uuid
|
||||
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
FieldList,
|
||||
Form,
|
||||
FormField,
|
||||
HiddenField,
|
||||
IntegerField,
|
||||
PasswordField,
|
||||
SelectField,
|
||||
StringField,
|
||||
TextAreaField,
|
||||
)
|
||||
from wtforms.validators import Length, NumberRange, Optional
|
||||
|
||||
from changedetectionio.llm.tokens import STRUCTURED_OUTPUT_INSTRUCTION
|
||||
|
||||
# The built-in instruction appended after the diff — shown as placeholder text.
|
||||
DEFAULT_SUMMARY_PROMPT = (
|
||||
"Analyse all changes in this diff.\n\n"
|
||||
+ STRUCTURED_OUTPUT_INSTRUCTION
|
||||
)
|
||||
|
||||
# Allowed characters for a connection ID coming from the browser.
|
||||
_CONN_ID_RE = re.compile(r'^[a-zA-Z0-9_-]{1,64}$')
|
||||
|
||||
|
||||
def sanitised_conn_id(raw):
|
||||
"""Return raw if it looks like a safe identifier, otherwise a fresh UUID."""
|
||||
s = (raw or '').strip()
|
||||
return s if _CONN_ID_RE.match(s) else str(_uuid.uuid4())
|
||||
|
||||
|
||||
class LLMConnectionEntryForm(Form):
|
||||
"""Schema for a single LLM connection.
|
||||
|
||||
Declaring every field here is what prevents arbitrary key injection:
|
||||
only these fields can ever reach the datastore from this form.
|
||||
"""
|
||||
connection_id = HiddenField()
|
||||
name = StringField(_l('Name'), validators=[Optional(), Length(max=100)])
|
||||
model = StringField(_l('Model string'), validators=[Optional(), Length(max=200)])
|
||||
api_key = StringField(_l('API Key'), validators=[Optional(), Length(max=500)])
|
||||
api_base = StringField(_l('API Endpoint'), validators=[Optional(), Length(max=500)])
|
||||
tokens_per_minute = IntegerField(_l('Tokens/min'), validators=[Optional(), NumberRange(min=0, max=10_000_000)], default=0)
|
||||
is_default = BooleanField(_l('Default'), validators=[Optional()])
|
||||
|
||||
|
||||
class LLMNewConnectionForm(Form):
|
||||
"""Staging fields for the 'Add a connection' UI.
|
||||
|
||||
These are read client-side by llm.js to build a new FieldList entry on click.
|
||||
They are never used server-side — render_kw sets the id attributes llm.js
|
||||
looks up with $('#llm-add-name') etc.
|
||||
"""
|
||||
preset = SelectField(
|
||||
_l('Provider template'),
|
||||
validate_choice=False,
|
||||
# WTForms 3.x uses a dict for optgroups (has_groups() checks isinstance(choices, dict)).
|
||||
# An empty-string key renders as <optgroup label=""> which browsers treat as ungrouped.
|
||||
choices={
|
||||
'': [('', '')],
|
||||
_l('Cloud'): [
|
||||
('openai-mini', 'OpenAI — gpt-4o-mini'),
|
||||
('openai-4o', 'OpenAI — gpt-4o'),
|
||||
('anthropic-haiku', 'Anthropic — claude-3-haiku'),
|
||||
('anthropic-sonnet', 'Anthropic — claude-3-5-sonnet'),
|
||||
('groq-8b', 'Groq — llama-3.1-8b-instant'),
|
||||
('groq-70b', 'Groq — llama-3.3-70b-versatile'),
|
||||
('gemini-flash', 'Google — gemini-1.5-flash'),
|
||||
('mistral-small', 'Mistral — mistral-small'),
|
||||
('deepseek', 'DeepSeek — deepseek-chat'),
|
||||
('openrouter', 'OpenRouter (custom model)'),
|
||||
],
|
||||
_l('Local'): [
|
||||
('ollama-llama', 'Ollama — llama3.1'),
|
||||
('ollama-mistral', 'Ollama — mistral'),
|
||||
('lmstudio', 'LM Studio'),
|
||||
],
|
||||
_l('Custom'): [
|
||||
('custom', _l('Manual entry')),
|
||||
],
|
||||
},
|
||||
render_kw={'id': 'llm-preset'},
|
||||
)
|
||||
name = StringField(_l('Name'),
|
||||
render_kw={'id': 'llm-add-name', 'size': 30,
|
||||
'autocomplete': 'off'})
|
||||
model = StringField(_l('Model string'),
|
||||
render_kw={'id': 'llm-add-model', 'size': 40,
|
||||
'placeholder': 'gpt-4o-mini', 'autocomplete': 'off'})
|
||||
api_key = PasswordField(_l('API Key'),
|
||||
render_kw={'id': 'llm-add-key', 'size': 40,
|
||||
'placeholder': 'sk-…', 'autocomplete': 'off'})
|
||||
api_base = StringField(_l('API Endpoint'),
|
||||
render_kw={'id': 'llm-add-base', 'size': 40,
|
||||
'placeholder': 'http://localhost:11434', 'autocomplete': 'off'})
|
||||
tokens_per_minute = IntegerField(_l('Tokens/min'), default=0,
|
||||
render_kw={'id': 'llm-add-tpm', 'style': 'width: 8em;',
|
||||
'min': '0', 'step': '1000'})
|
||||
|
||||
|
||||
class LLMSettingsForm(Form):
|
||||
"""WTForms form for the LLM settings tab.
|
||||
|
||||
llm_connection is a FieldList of LLMConnectionEntryForm entries.
|
||||
llm.js emits individual hidden inputs (llm_connection-N-fieldname) on submit
|
||||
instead of a JSON blob, so WTForms processes them through the declared schema.
|
||||
"""
|
||||
llm_connection = FieldList(FormField(LLMConnectionEntryForm), min_entries=0)
|
||||
new_connection = FormField(LLMNewConnectionForm)
|
||||
|
||||
llm_diff_context_lines = IntegerField(
|
||||
_l('Diff context lines'),
|
||||
validators=[Optional(), NumberRange(min=0, max=20)],
|
||||
default=2,
|
||||
description=_l(
|
||||
'Number of unchanged lines shown around each change in the diff. '
|
||||
'More lines give the LLM more context but increase token usage. (default: 2)'
|
||||
),
|
||||
render_kw={'style': 'width: 5em;', 'min': '0', 'max': '20'},
|
||||
)
|
||||
|
||||
llm_summary_prompt = TextAreaField(
|
||||
_l('Summary prompt'),
|
||||
validators=[Optional()],
|
||||
description=_l(
|
||||
'Override the instruction sent to the LLM after the diff. '
|
||||
'Leave blank to use the built-in default (structured JSON output).'
|
||||
),
|
||||
render_kw={
|
||||
'rows': 8,
|
||||
'placeholder': DEFAULT_SUMMARY_PROMPT,
|
||||
'class': 'pure-input-1',
|
||||
},
|
||||
)
|
||||
@@ -1,103 +0,0 @@
|
||||
<script src="{{url_for('static_content', group='js', filename='llm.js')}}" defer></script>
|
||||
<script>
|
||||
var LLM_CONNECTIONS = (function () {
|
||||
var list = {{ plugin_form.llm_connection.data|tojson }};
|
||||
var out = {};
|
||||
(list || []).forEach(function (c) { if (c && c.connection_id) out[c.connection_id] = c; });
|
||||
return out;
|
||||
}());
|
||||
var LLM_I18N = {
|
||||
noConnections: '{{ _("No connections configured yet.") }}',
|
||||
setDefault: '{{ _("Set as default") }}',
|
||||
remove: '{{ _("Remove") }}',
|
||||
show: '{{ _("show") }}',
|
||||
hide: '{{ _("hide") }}',
|
||||
nameModelRequired: '{{ _("Name and Model string are required.") }}'
|
||||
};
|
||||
</script>
|
||||
|
||||
{# ── Configured connections table ──────────────────── #}
|
||||
<fieldset>
|
||||
<legend>{{ _('LLM Connections') }}</legend>
|
||||
|
||||
<table class="pure-table pure-table-horizontal llm-connections">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="llm-col-def" title="{{ _('Default') }}">{{ _('Default') }}</th>
|
||||
<th class="llm-col-name">{{ _('Name') }}</th>
|
||||
<th class="llm-col-model">{{ _('Model') }}</th>
|
||||
<th class="llm-col-key">{{ _('API Key') }}</th>
|
||||
<th class="llm-col-tpm" title="{{ _('Tokens per minute limit (0 = unlimited)') }}">{{ _('TPM') }}</th>
|
||||
<th class="llm-col-del"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="llm-connections-tbody">
|
||||
</tbody>
|
||||
</table>
|
||||
</fieldset>
|
||||
|
||||
{# ── Add connection ─────────────────────────────────── #}
|
||||
{% set nf = plugin_form.new_connection.form %}
|
||||
<fieldset>
|
||||
<legend>{{ _('Add a connection') }}</legend>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ nf.preset.label }}
|
||||
{{ nf.preset() }}
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ nf.name.label }}
|
||||
{{ nf.name(placeholder=_('e.g. My OpenAI')) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ nf.model.label }}
|
||||
{{ nf.model() }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<label for="llm-add-key">
|
||||
{{ _('API Key') }}
|
||||
<span class="pure-form-message-inline">({{ _('leave blank for local') }})</span>
|
||||
</label>
|
||||
<div class="llm-key-wrap">
|
||||
{{ nf.api_key() }}
|
||||
<button type="button" id="llm-key-toggle" class="pure-button">{{ _('show') }}</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group" id="llm-base-group" style="display:none">
|
||||
<label for="llm-add-base">
|
||||
{{ _('API Endpoint') }}
|
||||
<span class="pure-form-message-inline">({{ _('optional') }})</span>
|
||||
</label>
|
||||
{{ nf.api_base() }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<label for="llm-add-tpm">
|
||||
{{ _('Tokens/min limit') }}
|
||||
<span class="pure-form-message-inline">({{ _('0 = unlimited') }})</span>
|
||||
</label>
|
||||
{{ nf.tokens_per_minute() }}
|
||||
</div>
|
||||
|
||||
<div class="pure-controls">
|
||||
<button type="button" id="llm-btn-add" class="pure-button pure-button-primary">{{ _('+ Add connection') }}</button>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
{# ── Prompt configuration ────────────────────────────────── #}
|
||||
<fieldset>
|
||||
<legend>{{ _('Summary Prompt') }}</legend>
|
||||
<div class="pure-control-group">
|
||||
{{ plugin_form.llm_diff_context_lines.label }}
|
||||
{{ plugin_form.llm_diff_context_lines() }}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Unchanged lines shown around each change in the diff sent to the LLM. More lines = more context but higher token cost. (default: 2)') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(plugin_form.llm_summary_prompt) }}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Instruction appended after the diff in every LLM call. Leave blank to use the built-in default (structured JSON output).') }}
|
||||
</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
@@ -1,197 +0,0 @@
|
||||
"""
|
||||
LLM notification token definitions and file I/O helpers.
|
||||
|
||||
All LLM data for a snapshot is stored under a dedicated subdirectory:
|
||||
{data_dir}/llm/{snapshot_id}-llm.json
|
||||
|
||||
A plain-text {snapshot_id}-llm.txt is also written containing just the
|
||||
summary field, for backward compatibility with any code that already reads it.
|
||||
|
||||
Token catalogue
|
||||
---------------
|
||||
llm_summary 1-3 sentence description of all changes, exact values.
|
||||
llm_headline 5-8 word punchy title — ideal for the notification subject line.
|
||||
llm_importance Numeric 1-10 significance score; enables routing rules like
|
||||
"only escalate if llm_importance >= 8".
|
||||
llm_sentiment Machine-readable: "positive", "negative", or "neutral".
|
||||
Useful for trend tracking and coloured alert styling.
|
||||
llm_one_liner Shortest useful summary — one sentence for SMS, Pushover,
|
||||
and other character-limited channels.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
# ── Constants ──────────────────────────────────────────────────────────────
|
||||
|
||||
LLM_TOKEN_NAMES = (
|
||||
'llm_summary',
|
||||
'llm_headline',
|
||||
'llm_importance',
|
||||
'llm_sentiment',
|
||||
'llm_one_liner',
|
||||
)
|
||||
|
||||
# How long the notification runner waits for LLM data before giving up.
|
||||
LLM_NOTIFICATION_RETRY_DELAY_SECONDS = int(os.getenv('LLM_NOTIFICATION_RETRY_DELAY', '10'))
|
||||
LLM_NOTIFICATION_MAX_WAIT_ATTEMPTS = int(os.getenv('LLM_NOTIFICATION_MAX_WAIT', '18')) # 18 × 10s = 3 min
|
||||
|
||||
# JSON prompt fragment — embedded in the final summarisation call.
|
||||
STRUCTURED_OUTPUT_INSTRUCTION = (
|
||||
'Return ONLY a valid JSON object — no markdown fences, no extra text — using exactly these keys:\n'
|
||||
'{"summary":"1-3 sentences covering ALL changes; use exact values from the diff.","headline":"5-8 word punchy title for this specific change","importance":7,"sentiment":"positive","one_liner":"One sentence for SMS/push character limits."}\n'
|
||||
'importance: 1=trivial whitespace, 5=moderate content change, 10=critical price/availability change.\n'
|
||||
'sentiment: "positive" (desirable for the user), "negative" (undesirable), or "neutral" (informational only).'
|
||||
)
|
||||
|
||||
|
||||
# ── File I/O ───────────────────────────────────────────────────────────────
|
||||
|
||||
def llm_subdir(data_dir: str) -> str:
|
||||
"""Return the llm/ subdirectory path (does not create it)."""
|
||||
return os.path.join(data_dir, 'llm')
|
||||
|
||||
|
||||
def llm_json_path(data_dir: str, snapshot_id: str) -> str:
|
||||
return os.path.join(llm_subdir(data_dir), f"{snapshot_id}-llm.json")
|
||||
|
||||
|
||||
def llm_txt_path(data_dir: str, snapshot_id: str) -> str:
|
||||
return os.path.join(llm_subdir(data_dir), f"{snapshot_id}-llm.txt")
|
||||
|
||||
|
||||
def is_llm_data_ready(data_dir: str, snapshot_id: str) -> bool:
|
||||
"""Return True if LLM data has been written for this snapshot."""
|
||||
return os.path.exists(llm_json_path(data_dir, snapshot_id)) or \
|
||||
os.path.exists(llm_txt_path(data_dir, snapshot_id))
|
||||
|
||||
|
||||
def read_llm_tokens(data_dir: str, snapshot_id: str) -> dict:
|
||||
"""
|
||||
Read LLM token data for a snapshot.
|
||||
|
||||
Tries JSON first (new format), falls back to plain .txt (old format).
|
||||
Returns an empty dict if no data is available yet.
|
||||
"""
|
||||
json_file = llm_json_path(data_dir, snapshot_id)
|
||||
if os.path.exists(json_file):
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
return _normalise(data)
|
||||
except Exception as exc:
|
||||
logger.warning(f"LLM tokens: failed to read {json_file}: {exc}")
|
||||
|
||||
txt_file = llm_txt_path(data_dir, snapshot_id)
|
||||
if os.path.exists(txt_file):
|
||||
try:
|
||||
with open(txt_file, 'r', encoding='utf-8') as f:
|
||||
summary = f.read().strip()
|
||||
return _normalise({'summary': summary, 'one_liner': summary[:200]})
|
||||
except Exception as exc:
|
||||
logger.warning(f"LLM tokens: failed to read {txt_file}: {exc}")
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def write_llm_data(data_dir: str, snapshot_id: str, data: dict) -> str:
|
||||
"""
|
||||
Atomically write LLM data to the llm/ subdirectory.
|
||||
|
||||
Writes:
|
||||
llm/{snapshot_id}-llm.json — full structured data (all tokens)
|
||||
llm/{snapshot_id}-llm.txt — plain summary text (backward compat)
|
||||
|
||||
Returns the path of the JSON file.
|
||||
"""
|
||||
normalised = _normalise(data)
|
||||
|
||||
subdir = llm_subdir(data_dir)
|
||||
os.makedirs(subdir, exist_ok=True)
|
||||
|
||||
json_file = llm_json_path(data_dir, snapshot_id)
|
||||
_atomic_write_text(json_file, json.dumps(normalised, ensure_ascii=False))
|
||||
|
||||
txt_file = llm_txt_path(data_dir, snapshot_id)
|
||||
_atomic_write_text(txt_file, normalised.get('summary', ''))
|
||||
|
||||
return json_file
|
||||
|
||||
|
||||
def parse_llm_response(response: str) -> dict:
|
||||
"""
|
||||
Parse a structured JSON response from the LLM.
|
||||
|
||||
Tries strict JSON parse, then extracts from markdown code fences,
|
||||
then a bare object search. Falls back to treating the whole response
|
||||
as the 'summary' field if nothing parses.
|
||||
"""
|
||||
import re
|
||||
text = response.strip()
|
||||
|
||||
# 1. Direct JSON parse
|
||||
try:
|
||||
obj = json.loads(text)
|
||||
if isinstance(obj, dict):
|
||||
return _normalise(obj)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
|
||||
# 2. Markdown code fence: ```json { ... } ```
|
||||
m = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
|
||||
if m:
|
||||
try:
|
||||
obj = json.loads(m.group(1))
|
||||
if isinstance(obj, dict):
|
||||
return _normalise(obj)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
|
||||
# 3. Bare JSON object anywhere in the response
|
||||
m = re.search(r'\{[^{}]*\}', text, re.DOTALL)
|
||||
if m:
|
||||
try:
|
||||
obj = json.loads(m.group(0))
|
||||
if isinstance(obj, dict):
|
||||
return _normalise(obj)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
|
||||
# 4. Fallback — treat entire response as summary
|
||||
logger.debug("LLM response was not valid JSON — using raw text as summary")
|
||||
return _normalise({'summary': text, 'one_liner': text[:200] if len(text) > 200 else text})
|
||||
|
||||
|
||||
# ── Internal helpers ───────────────────────────────────────────────────────
|
||||
|
||||
def _normalise(data: dict) -> dict:
|
||||
"""Return a clean token dict with all expected keys present."""
|
||||
importance = data.get('importance')
|
||||
if importance is not None:
|
||||
try:
|
||||
importance = max(1, min(10, int(float(importance))))
|
||||
except (TypeError, ValueError):
|
||||
importance = None
|
||||
|
||||
sentiment = str(data.get('sentiment', '')).lower().strip()
|
||||
if sentiment not in ('positive', 'negative', 'neutral'):
|
||||
sentiment = ''
|
||||
|
||||
return {
|
||||
'summary': str(data.get('summary', '') or '').strip(),
|
||||
'headline': str(data.get('headline', '') or '').strip(),
|
||||
'importance': importance,
|
||||
'sentiment': sentiment,
|
||||
'one_liner': str(data.get('one_liner', '') or '').strip(),
|
||||
}
|
||||
|
||||
|
||||
def _atomic_write_text(path: str, text: str) -> None:
|
||||
tmp = path + '.tmp'
|
||||
with open(tmp, 'w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp, path)
|
||||
@@ -43,6 +43,11 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||
_FAVICON_FILENAME_CACHE: dict = {}
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
@@ -806,9 +811,8 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
# Invalidate module-level favicon filename cache for this watch
|
||||
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
@@ -823,35 +827,23 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
Find any favicon.* file in the watch data directory.
|
||||
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||
deepcopy (which drops instance attrs), and concurrent request races.
|
||||
Invalidated by bump_favicon() when a new favicon is saved.
|
||||
|
||||
Returns:
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
str: Basename of the favicon file, or None if not found.
|
||||
"""
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
fname = os.path.basename(files[0]) if files else None
|
||||
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||
return fname
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -1009,31 +1001,14 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
|
||||
def extra_notification_token_values(self):
|
||||
from changedetectionio.llm.tokens import read_llm_tokens
|
||||
history = self.history
|
||||
if not history:
|
||||
return {}
|
||||
latest_fname = history[list(history.keys())[-1]]
|
||||
snapshot_id = os.path.basename(latest_fname).split('.')[0] # always 32-char MD5
|
||||
data = read_llm_tokens(self.data_dir, snapshot_id)
|
||||
if not data:
|
||||
return {}
|
||||
return {
|
||||
'llm_summary': data.get('summary', ''),
|
||||
'llm_headline': data.get('headline', ''),
|
||||
'llm_importance': data.get('importance'),
|
||||
'llm_sentiment': data.get('sentiment', ''),
|
||||
'llm_one_liner': data.get('one_liner', ''),
|
||||
}
|
||||
# Used for providing extra tokens
|
||||
# return {'widget': 555}
|
||||
return {}
|
||||
|
||||
def extra_notification_token_placeholder_info(self):
|
||||
return [
|
||||
('llm_summary', "LLM: 1-3 sentence summary of all changes with exact values"),
|
||||
('llm_headline', "LLM: 5-8 word punchy title for this specific change"),
|
||||
('llm_importance', "LLM: Significance score 1-10 (1=trivial, 10=critical)"),
|
||||
('llm_sentiment', "LLM: Change sentiment — positive, negative, or neutral"),
|
||||
('llm_one_liner', "LLM: One sentence for SMS/push character limits"),
|
||||
]
|
||||
# Used for providing extra tokens
|
||||
# return [('widget', "Get widget amounts")]
|
||||
return []
|
||||
|
||||
|
||||
def extract_regex_from_all_history(self, regex):
|
||||
@@ -1199,18 +1174,13 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for
|
||||
from flask import url_for, has_request_context
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
has_app_context = has_request_context()
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
|
||||
@@ -6,7 +6,7 @@ Extracted from update_worker.py to provide standalone notification functionality
|
||||
for both sync and async workers
|
||||
"""
|
||||
import datetime
|
||||
import os
|
||||
from copy import deepcopy
|
||||
|
||||
import pytz
|
||||
from loguru import logger
|
||||
@@ -63,6 +63,7 @@ class FormattableTimestamp(str):
|
||||
|
||||
{{ change_datetime }} → '2024-01-15 10:30:00 UTC'
|
||||
{{ change_datetime(format='%Y') }} → '2024'
|
||||
{{ change_datetime(format='%A') }} → 'Monday'
|
||||
{{ change_datetime(format='%Y-%m-%d') }} → '2024-01-15'
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
@@ -88,6 +89,62 @@ class FormattableTimestamp(str):
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
with the default options for that variant, but can be called with custom
|
||||
arguments in Jinja2 templates:
|
||||
|
||||
{{ diff }} → default diff output
|
||||
{{ diff(lines=5) }} → truncate to 5 lines
|
||||
{{ diff(added_only=true) }} → only show added lines
|
||||
{{ diff(removed_only=true) }} → only show removed lines
|
||||
{{ diff(context=3) }} → 3 lines of context around changes
|
||||
{{ diff(word_diff=false) }} → line-level diff instead of word-level
|
||||
{{ diff(lines=10, added_only=true) }} → combine args
|
||||
{{ diff_added(lines=5) }} → works on any diff_* variant too
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
|
||||
else:
|
||||
rendered = ''
|
||||
instance = super().__new__(cls, rendered)
|
||||
instance._prev = prev_snapshot
|
||||
instance._current = current_snapshot
|
||||
instance._base_kwargs = base_kwargs
|
||||
return instance
|
||||
|
||||
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
|
||||
word_diff=None, case_insensitive=False, ignore_junk=False):
|
||||
from changedetectionio import diff as diff_module
|
||||
kwargs = dict(self._base_kwargs)
|
||||
|
||||
if added_only:
|
||||
kwargs['include_removed'] = False
|
||||
if removed_only:
|
||||
kwargs['include_added'] = False
|
||||
if context:
|
||||
kwargs['context_lines'] = int(context)
|
||||
if word_diff is not None:
|
||||
kwargs['word_diff'] = bool(word_diff)
|
||||
if case_insensitive:
|
||||
kwargs['case_insensitive'] = True
|
||||
if ignore_junk:
|
||||
kwargs['ignore_junk'] = True
|
||||
|
||||
result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs)
|
||||
|
||||
if lines is not None:
|
||||
result = '\n'.join(result.splitlines()[:int(lines)])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
@@ -96,15 +153,15 @@ class NotificationContextData(dict):
|
||||
'base_url': None,
|
||||
'change_datetime': FormattableTimestamp(time.time()),
|
||||
'current_snapshot': None,
|
||||
'diff': None,
|
||||
'diff_added': None,
|
||||
'diff_added_clean': None,
|
||||
'diff_clean': None,
|
||||
'diff_full': None,
|
||||
'diff_full_clean': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_removed_clean': None,
|
||||
'diff': FormattableDiff('', ''),
|
||||
'diff_clean': FormattableDiff('', '', include_change_type_prefix=False),
|
||||
'diff_added': FormattableDiff('', '', include_removed=False),
|
||||
'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False),
|
||||
'diff_full': FormattableDiff('', '', include_equal=True),
|
||||
'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False),
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
@@ -119,12 +176,6 @@ class NotificationContextData(dict):
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
# LLM-generated tokens (populated by notification_runner once LLM data is ready)
|
||||
'llm_headline': None,
|
||||
'llm_importance': None,
|
||||
'llm_one_liner': None,
|
||||
'llm_sentiment': None,
|
||||
'llm_summary': None,
|
||||
'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
})
|
||||
|
||||
@@ -147,7 +198,7 @@ class NotificationContextData(dict):
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime']:
|
||||
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'):
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
@@ -176,13 +227,12 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Define specifications for each diff variant
|
||||
# Define base kwargs for each diff variant — these become the stored defaults
|
||||
# on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them
|
||||
diff_specs = {
|
||||
'diff': {'word_diff': word_diff},
|
||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||
@@ -195,22 +245,15 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
||||
@lru_cache(maxsize=4)
|
||||
def cached_render(kwargs_tuple):
|
||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only check and render diff keys that exist in NotificationContextData
|
||||
# Only create FormattableDiff objects for diff keys actually used in the notification text
|
||||
for key in NotificationContextData().keys():
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
kwargs = diff_specs[key]
|
||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
@@ -300,11 +343,6 @@ class NotificationService:
|
||||
timestamp_changed=dates[date_index_to]))
|
||||
|
||||
if self.notification_q:
|
||||
# Store snapshot_id hint so notification_runner can gate on LLM data readiness
|
||||
if watch and len(dates) > 0:
|
||||
latest_fname = watch.history.get(dates[date_index_to], '')
|
||||
if latest_fname:
|
||||
n_object['_llm_snapshot_id'] = os.path.basename(latest_fname).split('.')[0]
|
||||
logger.debug("Queued notification for sending")
|
||||
self.notification_q.put(n_object)
|
||||
else:
|
||||
@@ -315,7 +353,7 @@ class NotificationService:
|
||||
"""
|
||||
Send notification when content changes are detected
|
||||
"""
|
||||
n_object = NotificationContextData()
|
||||
|
||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
@@ -332,21 +370,51 @@ class NotificationService:
|
||||
# Should be a better parent getter in the model object
|
||||
|
||||
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
|
||||
# this change probably not needed?
|
||||
n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch)
|
||||
# If the watch has no notification_body for example, it will try to get from the first matching group or system setting
|
||||
|
||||
# Should be, if none in the watch, and no group tag ones found, then use system ones at the end
|
||||
#n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch)
|
||||
n_object = NotificationContextData()
|
||||
n_object['notification_title'] = _check_cascading_vars(self.datastore,'notification_title', watch)
|
||||
n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch)
|
||||
n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch)
|
||||
|
||||
notification_objects = []
|
||||
if n_object.get('notification_urls'):
|
||||
notification_objects.append(n_object)
|
||||
|
||||
|
||||
# LOGIC SHOULD BE something that all tests currently pass too
|
||||
# !!! _check_cascading_vars is not really used much, only used here..
|
||||
#
|
||||
|
||||
|
||||
# If any related group/tag has a notification_url set, then we fan out horizontally and collect it as extra notifications
|
||||
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
logger.debug(f'{len(tags)} related to this watch')
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
logger.debug(f"Checking group/tag for notification URLs '{tag['title']}' Muted? '{tag.get('notification_muted')}', URLs {tag.get('notification_urls')}")
|
||||
v = tag.get('notification_urls')
|
||||
if v and not tag.get('notification_muted'):
|
||||
logger.debug("OK MAN")
|
||||
next_n_object = deepcopy(n_object)
|
||||
next_n_object['notification_urls'] = v
|
||||
next_n_object['notification_title'] = _check_cascading_vars(self.datastore, 'notification_title', watch)
|
||||
next_n_object['notification_body'] = _check_cascading_vars(self.datastore, 'notification_body', watch)
|
||||
next_n_object['notification_format'] = _check_cascading_vars(self.datastore, 'notification_format', watch)
|
||||
notification_objects.append(next_n_object)
|
||||
logger.debug(f"Adding notification from group/tag {tag['title']}")
|
||||
|
||||
|
||||
# (Individual watch) Only prepare to notify if the rules above matched
|
||||
queued = False
|
||||
if n_object and n_object.get('notification_urls'):
|
||||
if notification_objects:
|
||||
queued = True
|
||||
|
||||
count = watch.get('notification_alert_count', 0) + 1
|
||||
self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
|
||||
|
||||
self.queue_notification_for_watch(n_object=n_object, watch=watch)
|
||||
for n_object in notification_objects:
|
||||
self.queue_notification_for_watch(n_object=n_object, watch=watch)
|
||||
|
||||
return queued
|
||||
|
||||
|
||||
@@ -151,8 +151,7 @@ class ChangeDetectionSpec:
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def update_finalize(update_handler, watch, datastore, processing_exception,
|
||||
changed_detected=False, snapshot_id=None):
|
||||
def update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Called after watch processing completes (success or failure).
|
||||
|
||||
This hook is called in the finally block after all processing is complete,
|
||||
@@ -169,10 +168,6 @@ class ChangeDetectionSpec:
|
||||
processing_exception: The exception from the main processing block, or None if successful.
|
||||
This does NOT include cleanup exceptions - only exceptions from
|
||||
the actual watch processing (fetch, diff, etc).
|
||||
changed_detected: True when the processor detected a content change (default False).
|
||||
snapshot_id: MD5 hex string of the new snapshot, matches the prefix of the history
|
||||
filename (e.g. 'abc123…' → 'abc123….txt[.br]'). None when no snapshot
|
||||
was saved (first run, error, same content).
|
||||
|
||||
Returns:
|
||||
None: This hook doesn't return a value
|
||||
@@ -585,8 +580,7 @@ def apply_update_handler_alter(update_handler, watch, datastore):
|
||||
return current_handler
|
||||
|
||||
|
||||
def apply_update_finalize(update_handler, watch, datastore, processing_exception,
|
||||
changed_detected=False, snapshot_id=None):
|
||||
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Apply update_finalize hooks from all plugins.
|
||||
|
||||
Called in the finally block after watch processing completes, allowing plugins
|
||||
@@ -597,8 +591,6 @@ def apply_update_finalize(update_handler, watch, datastore, processing_exception
|
||||
watch: The watch dict that was processed (may be None)
|
||||
datastore: The application datastore
|
||||
processing_exception: The exception from processing, or None if successful
|
||||
changed_detected: True when the processor detected a content change.
|
||||
snapshot_id: MD5 hex string of the new snapshot, or None.
|
||||
|
||||
Returns:
|
||||
None
|
||||
@@ -609,9 +601,7 @@ def apply_update_finalize(update_handler, watch, datastore, processing_exception
|
||||
update_handler=update_handler,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
processing_exception=processing_exception,
|
||||
changed_detected=changed_detected,
|
||||
snapshot_id=snapshot_id,
|
||||
processing_exception=processing_exception
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't let plugin errors crash the worker
|
||||
|
||||
@@ -9,6 +9,15 @@ Some suggestions for the future
|
||||
|
||||
- `graphical`
|
||||
|
||||
## API schema extension (`api.yaml`)
|
||||
|
||||
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
|
||||
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
|
||||
making `processor_config_<name>` a valid field on all watch create/update API calls.
|
||||
The fully merged spec is served live at `/api/v1/full-spec`.
|
||||
|
||||
See `restock_diff/api.yaml` for a working example.
|
||||
|
||||
## Todo
|
||||
|
||||
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import asyncio
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
from copy import deepcopy
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
@@ -95,6 +98,23 @@ class difference_detection_processor():
|
||||
self.last_raw_content_checksum = None
|
||||
|
||||
|
||||
async def validate_iana_url(self):
|
||||
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
|
||||
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
|
||||
through call_browser().
|
||||
"""
|
||||
if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
|
||||
return
|
||||
parsed = urlparse(self.watch.link)
|
||||
if not parsed.hostname:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
|
||||
raise Exception(
|
||||
f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
|
||||
)
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
@@ -108,6 +128,8 @@ class difference_detection_processor():
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
await self.validate_iana_url()
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
@@ -238,6 +260,16 @@ class difference_detection_processor():
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
|
||||
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
def get_extra_watch_config(self, filename):
|
||||
|
||||
@@ -31,6 +31,7 @@ class Restock(dict):
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
# @todo locale needs to be the locale of the webpage
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
@@ -67,10 +68,6 @@ class Watch(BaseWatch):
|
||||
super().__init__(*arg, **kw)
|
||||
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||
|
||||
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing' : 'in_stock_only'
|
||||
} #@todo update
|
||||
|
||||
def clear_watch(self):
|
||||
super().clear_watch()
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
components:
|
||||
schemas:
|
||||
processor_config_restock_diff:
|
||||
type: object
|
||||
description: Configuration for the restock_diff processor (restock and price tracking)
|
||||
properties:
|
||||
in_stock_processing:
|
||||
type: string
|
||||
enum: [in_stock_only, all_changes, 'off']
|
||||
default: in_stock_only
|
||||
description: |
|
||||
When to trigger on stock changes:
|
||||
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
|
||||
- `all_changes`: Trigger on any availability change
|
||||
- `off`: Disable stock/availability tracking
|
||||
follow_price_changes:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Monitor and track price changes
|
||||
price_change_min:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price drops below this value
|
||||
price_change_max:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price rises above this value
|
||||
price_change_threshold_percent:
|
||||
type: [number, 'null']
|
||||
minimum: 0
|
||||
maximum: 100
|
||||
description: Minimum price change percentage since the original price to trigger a notification
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
'url': 'https://example.com/product',
|
||||
'processor': 'restock_diff',
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 5,
|
||||
}
|
||||
}
|
||||
response = requests.post('http://localhost:5000/api/v1/watch',
|
||||
headers=headers, json=data)
|
||||
print(response.json())
|
||||
|
||||
/watch/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
uuid = 'YOUR-UUID'
|
||||
data = {
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'all_changes',
|
||||
'follow_price_changes': True,
|
||||
'price_change_min': 10.00,
|
||||
'price_change_max': 500.00,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
|
||||
/tag/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"overrides_watch": true,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 10
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
tag_uuid = 'YOUR-TAG-UUID'
|
||||
data = {
|
||||
'overrides_watch': True,
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 10,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
|
||||
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
restock_settings = FormField(RestockSettingsForm)
|
||||
processor_config_restock_diff = FormField(RestockSettingsForm)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return _l('Restock & Price Detection')
|
||||
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):
|
||||
|
||||
output += """
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
<script>
|
||||
<script>
|
||||
$(document).ready(function () {
|
||||
toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
|
||||
toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
|
||||
});
|
||||
</script>
|
||||
|
||||
<fieldset id="restock-fieldset-price-group">
|
||||
<div class="pure-control-group">
|
||||
<fieldset class="pure-group inline-radio">
|
||||
{{ render_field(form.restock_settings.in_stock_processing) }}
|
||||
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
|
||||
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
|
||||
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
|
||||
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
|
||||
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||
</fieldset>
|
||||
</fieldset>
|
||||
</div>
|
||||
</fieldset>
|
||||
"""
|
||||
|
||||
@@ -437,26 +437,32 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
#useless
|
||||
# from ...html_tools import html_to_text
|
||||
# text = html_to_text(self.fetcher.content)
|
||||
# logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
# if not len(text):
|
||||
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
# raise ReplyWithContentButNoText(url=watch.link,
|
||||
# status_code=self.fetcher.get_last_status_code(),
|
||||
# screenshot=self.fetcher.screenshot,
|
||||
# html_content=self.fetcher.content,
|
||||
# xpath_data=self.fetcher.xpath_data
|
||||
# )
|
||||
|
||||
# Which restock settings to compare against?
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
|
||||
_extra_config = self.get_extra_watch_config('restock_diff.json')
|
||||
restock_settings = _extra_config.get('restock_diff') or {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
}
|
||||
|
||||
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
restock_settings = tag.get('processor_config_restock_diff') or {}
|
||||
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
|
||||
break
|
||||
|
||||
|
||||
@@ -283,4 +283,7 @@ def query_price_availability(extracted_data):
|
||||
if not result.get('availability') and 'availability' in microdata:
|
||||
result['availability'] = microdata['availability']
|
||||
|
||||
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
|
||||
# using something like babel you need to know the locale of the website and even then it can be problematic
|
||||
# we dont really do anything with the price data so far.. so just accept it the way it comes.
|
||||
return result
|
||||
|
||||
@@ -198,6 +198,7 @@ def handle_watch_update(socketio, **kwargs):
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
import platform
|
||||
@@ -344,4 +345,4 @@ def init_socketio(app, datastore):
|
||||
|
||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
||||
return socketio
|
||||
return socketio
|
||||
|
||||
@@ -1,187 +0,0 @@
|
||||
/* llm.js — LLM Connections management (settings page)
|
||||
* Depends on: jQuery (global), LLM_CONNECTIONS + LLM_I18N injected by Jinja2 template.
|
||||
*/
|
||||
(function ($) {
|
||||
'use strict';
|
||||
|
||||
// Provider presets: [value, label, model, api_base, tpm]
|
||||
// tpm = tokens-per-minute limit (0 = unlimited / local).
|
||||
// Defaults reflect free-tier or conservative tier-1 limits.
|
||||
var LLM_PRESETS = [
|
||||
['openai-mini', 'OpenAI — gpt-4o-mini', 'gpt-4o-mini', '', 200000],
|
||||
['openai-4o', 'OpenAI — gpt-4o', 'gpt-4o', '', 30000],
|
||||
['anthropic-haiku', 'Anthropic — claude-3-haiku', 'anthropic/claude-3-haiku-20240307', '', 100000],
|
||||
['anthropic-sonnet', 'Anthropic — claude-3-5-sonnet', 'anthropic/claude-3-5-sonnet-20241022', '', 40000],
|
||||
['groq-8b', 'Groq — llama-3.1-8b-instant', 'groq/llama-3.1-8b-instant', '', 6000],
|
||||
['groq-70b', 'Groq — llama-3.3-70b-versatile', 'groq/llama-3.3-70b-versatile', '', 6000],
|
||||
['gemini-flash', 'Google — gemini-1.5-flash', 'gemini/gemini-1.5-flash', '', 1000000],
|
||||
['mistral-small', 'Mistral — mistral-small', 'mistral/mistral-small-latest', '', 500000],
|
||||
['deepseek', 'DeepSeek — deepseek-chat', 'deepseek/deepseek-chat', '', 50000],
|
||||
['openrouter', 'OpenRouter (custom model)', 'openrouter/', '', 20000],
|
||||
['ollama-llama', 'Ollama — llama3.1 (local)', 'ollama/llama3.1', 'http://localhost:11434', 0],
|
||||
['ollama-mistral', 'Ollama — mistral (local)', 'ollama/mistral', 'http://localhost:11434', 0],
|
||||
['lmstudio', 'LM Studio (local)', 'openai/local', 'http://localhost:1234/v1', 0],
|
||||
];
|
||||
|
||||
var presetMap = {};
|
||||
$.each(LLM_PRESETS, function (_, p) { presetMap[p[0]] = p; });
|
||||
|
||||
function escHtml(s) {
|
||||
return $('<div>').text(String(s)).html();
|
||||
}
|
||||
|
||||
function maskKey(k) {
|
||||
if (!k) return '<span style="color:var(--color-grey-700)">—</span>';
|
||||
return escHtml(k.substring(0, 4)) + '••••';
|
||||
}
|
||||
|
||||
// Emit WTForms FieldList hidden inputs (llm_connection-N-fieldname) so the
|
||||
// server processes connections through the declared schema — no arbitrary keys.
|
||||
function serialise() {
|
||||
var $form = $('form.settings');
|
||||
$form.find('input[data-llm-gen]').remove();
|
||||
|
||||
var ids = Object.keys(LLM_CONNECTIONS);
|
||||
$.each(ids, function (i, id) {
|
||||
var c = LLM_CONNECTIONS[id];
|
||||
var prefix = 'llm_connection-' + i + '-';
|
||||
var fields = {
|
||||
connection_id: id,
|
||||
name: c.name || '',
|
||||
model: c.model || '',
|
||||
api_key: c.api_key || '',
|
||||
api_base: c.api_base || '',
|
||||
tokens_per_minute: parseInt(c.tokens_per_minute || 0, 10)
|
||||
};
|
||||
$.each(fields, function (field, value) {
|
||||
$('<input>').attr({ type: 'hidden', name: prefix + field, value: value, 'data-llm-gen': '1' }).appendTo($form);
|
||||
});
|
||||
// BooleanField: only emit when true (absence == false in WTForms)
|
||||
if (c.is_default) {
|
||||
$('<input>').attr({ type: 'hidden', name: prefix + 'is_default', value: 'y', 'data-llm-gen': '1' }).appendTo($form);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function renderTable() {
|
||||
var $tbody = $('#llm-connections-tbody');
|
||||
$tbody.empty();
|
||||
var ids = Object.keys(LLM_CONNECTIONS);
|
||||
if (!ids.length) {
|
||||
$tbody.html('<tr class="llm-empty"><td colspan="6">' + escHtml(LLM_I18N.noConnections) + '</td></tr>');
|
||||
return;
|
||||
}
|
||||
$.each(ids, function (_, id) {
|
||||
var c = LLM_CONNECTIONS[id];
|
||||
var tpm = parseInt(c.tokens_per_minute || 0, 10);
|
||||
var tpmLabel = tpm ? tpm.toLocaleString() : '<span style="color:var(--color-grey-700)">∞</span>';
|
||||
$tbody.append(
|
||||
'<tr>' +
|
||||
'<td class="llm-col-def">' +
|
||||
'<input type="radio" class="llm-default-radio" name="llm_default_radio"' +
|
||||
' title="' + escHtml(LLM_I18N.setDefault) + '"' +
|
||||
(c.is_default ? ' checked' : '') +
|
||||
' data-id="' + escHtml(id) + '">' +
|
||||
'</td>' +
|
||||
'<td class="llm-col-name">' + escHtml(c.name) + '</td>' +
|
||||
'<td class="llm-col-model">' + escHtml(c.model) + '</td>' +
|
||||
'<td class="llm-col-key">' + maskKey(c.api_key) + '</td>' +
|
||||
'<td class="llm-col-tpm">' + tpmLabel + '</td>' +
|
||||
'<td class="llm-col-del">' +
|
||||
'<button type="button" class="llm-del"' +
|
||||
' title="' + escHtml(LLM_I18N.remove) + '"' +
|
||||
' data-id="' + escHtml(id) + '">×</button>' +
|
||||
'</td>' +
|
||||
'</tr>'
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
$(function () {
|
||||
// Event delegation on tbody — survives re-renders
|
||||
$('#llm-connections-tbody')
|
||||
.on('change', '.llm-default-radio', function () {
|
||||
var chosen = String($(this).data('id'));
|
||||
$.each(LLM_CONNECTIONS, function (k) {
|
||||
LLM_CONNECTIONS[k].is_default = (k === chosen);
|
||||
});
|
||||
serialise();
|
||||
})
|
||||
.on('click', '.llm-del', function () {
|
||||
var id = String($(this).data('id'));
|
||||
delete LLM_CONNECTIONS[id];
|
||||
var remaining = Object.keys(LLM_CONNECTIONS);
|
||||
if (remaining.length && !remaining.some(function (k) { return LLM_CONNECTIONS[k].is_default; })) {
|
||||
LLM_CONNECTIONS[remaining[0]].is_default = true;
|
||||
}
|
||||
renderTable();
|
||||
serialise();
|
||||
});
|
||||
|
||||
function updateBaseVisibility() {
|
||||
var val = $('#llm-preset').val();
|
||||
var preset = presetMap[val];
|
||||
var hasBase = preset ? !!preset[3] : (val === 'custom');
|
||||
var show = (val === 'custom') || hasBase;
|
||||
$('#llm-base-group').toggle(show);
|
||||
}
|
||||
|
||||
// Preset dropdown pre-fills add form
|
||||
$('#llm-preset').on('change', function () {
|
||||
var val = $(this).val();
|
||||
var p = presetMap[val];
|
||||
if (p) {
|
||||
$('#llm-add-name').val(p[1].replace(/\s*—.*/, '').trim());
|
||||
$('#llm-add-model').val(p[2]);
|
||||
$('#llm-add-base').val(p[3]);
|
||||
$('#llm-add-tpm').val(p[4] !== undefined ? p[4] : 0);
|
||||
$('#llm-add-key').val('');
|
||||
}
|
||||
updateBaseVisibility();
|
||||
});
|
||||
|
||||
// Add connection
|
||||
$('#llm-btn-add').on('click', function () {
|
||||
var name = $.trim($('#llm-add-name').val());
|
||||
var model = $.trim($('#llm-add-model').val());
|
||||
var key = $.trim($('#llm-add-key').val());
|
||||
var base = $.trim($('#llm-add-base').val());
|
||||
var tpm = parseInt($('#llm-add-tpm').val(), 10) || 0;
|
||||
if (!name || !model) {
|
||||
alert(LLM_I18N.nameModelRequired);
|
||||
return;
|
||||
}
|
||||
var id = 'llm-' + Date.now();
|
||||
var isFirst = !Object.keys(LLM_CONNECTIONS).length;
|
||||
LLM_CONNECTIONS[id] = {
|
||||
name: name, model: model, api_key: key, api_base: base,
|
||||
tokens_per_minute: tpm, is_default: isFirst
|
||||
};
|
||||
$('#llm-preset, #llm-add-name, #llm-add-model, #llm-add-key, #llm-add-base').val('');
|
||||
$('#llm-add-tpm').val('0');
|
||||
$('#llm-base-group').hide();
|
||||
renderTable();
|
||||
serialise();
|
||||
});
|
||||
|
||||
// Show/hide API key visibility
|
||||
$('#llm-key-toggle').on('click', function () {
|
||||
var $inp = $('#llm-add-key');
|
||||
if ($inp.attr('type') === 'password') {
|
||||
$inp.attr('type', 'text');
|
||||
$(this).text(LLM_I18N.hide);
|
||||
} else {
|
||||
$inp.attr('type', 'password');
|
||||
$(this).text(LLM_I18N.show);
|
||||
}
|
||||
});
|
||||
|
||||
// Serialise connections to hidden field before form submit
|
||||
$('form.settings').on('submit', serialise);
|
||||
|
||||
// Init
|
||||
renderTable();
|
||||
serialise();
|
||||
});
|
||||
|
||||
}(jQuery));
|
||||
@@ -1,57 +0,0 @@
|
||||
#llm {
|
||||
// ── Key field wrapper — input + show/hide toggle inline ───────────────
|
||||
.llm-key-wrap {
|
||||
display: flex;
|
||||
gap: 0.3em;
|
||||
align-items: center;
|
||||
|
||||
input { flex: 1; min-width: 0; }
|
||||
|
||||
button {
|
||||
flex: 0 0 auto;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Pure-grid column padding consistency ──────────────────────────────
|
||||
.pure-u-md-1-2 {
|
||||
.pure-control-group {
|
||||
padding-right: 1em;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Connections table ─────────────────────────────────────────────────
|
||||
table.llm-connections {
|
||||
width: 100%;
|
||||
|
||||
.llm-col-def { width: 3em; text-align: center; }
|
||||
.llm-col-name { font-weight: 500; }
|
||||
.llm-col-model { font-family: monospace; font-size: 0.85em; color: var(--color-grey-400); }
|
||||
.llm-col-key {
|
||||
font-family: monospace; font-size: 0.82em; color: var(--color-grey-600);
|
||||
max-width: 140px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
|
||||
}
|
||||
.llm-col-del { width: 2.5em; text-align: center; }
|
||||
|
||||
.llm-del {
|
||||
background: none;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
color: var(--color-grey-600);
|
||||
padding: 0.15em 0.4em;
|
||||
border-radius: 3px;
|
||||
font-size: 1.1em;
|
||||
line-height: 1;
|
||||
&:hover { color: var(--color-dark-red); background: #ffeaea; }
|
||||
}
|
||||
|
||||
.llm-empty td {
|
||||
text-align: center;
|
||||
color: var(--color-grey-600);
|
||||
padding: 1.8em;
|
||||
font-style: italic;
|
||||
font-size: 0.92em;
|
||||
}
|
||||
}
|
||||
|
||||
.llm-default-radio { cursor: pointer; }
|
||||
}
|
||||
@@ -32,7 +32,6 @@
|
||||
@use "parts/toast";
|
||||
@use "parts/login_form";
|
||||
@use "parts/tabs";
|
||||
@use "parts/llm";
|
||||
|
||||
// Smooth transitions for theme switching
|
||||
body,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -730,3 +730,48 @@ class DatastoreUpdatesMixin:
|
||||
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
|
||||
self._save_settings()
|
||||
|
||||
def update_30(self):
|
||||
"""Migrate restock_settings out of watch.json into restock_diff.json processor config file.
|
||||
|
||||
Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
|
||||
were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
|
||||
processor config file (restock_diff.json) consistent with the processor_config_* API system.
|
||||
|
||||
For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
|
||||
matching what the API writes when updating a tag.
|
||||
|
||||
Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
|
||||
have processor_config_restock_diff set.
|
||||
"""
|
||||
import json
|
||||
|
||||
# --- Watches ---
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if watch.get('processor') != 'restock_diff':
|
||||
continue
|
||||
restock_settings = watch.get('restock_settings')
|
||||
if not restock_settings:
|
||||
continue
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if data_dir:
|
||||
watch.ensure_data_dir_exists()
|
||||
filepath = os.path.join(data_dir, 'restock_diff.json')
|
||||
if not os.path.isfile(filepath):
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump({'restock_diff': restock_settings}, f, indent=2)
|
||||
logger.info(f"update_30: migrated restock_settings → {filepath}")
|
||||
|
||||
del self.data['watching'][uuid]['restock_settings']
|
||||
watch.commit()
|
||||
|
||||
# --- Tags ---
|
||||
for tag_uuid, tag in self.data['settings']['application']['tags'].items():
|
||||
restock_settings = tag.get('restock_settings')
|
||||
if not restock_settings or tag.get('processor_config_restock_diff'):
|
||||
continue
|
||||
tag['processor_config_restock_diff'] = restock_settings
|
||||
del tag['restock_settings']
|
||||
tag.commit()
|
||||
logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")
|
||||
|
||||
|
||||
@@ -58,7 +58,12 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff}}' }}</code></td>
|
||||
<td>{{ _('The diff output - only changes, additions, and removals') }}</td>
|
||||
<td>{{ _('The diff output - only changes, additions, and removals') }}<br>
|
||||
<small>
|
||||
{{ _('All diff variants accept') }} <code>lines=</code>, <code>context=</code>, <code>word_diff=</code>, <code>ignore_junk=</code> {{ _('args, e.g.') }}
|
||||
<code>{{ '{{diff(lines=10)}}' }}</code>, <code>{{ '{{diff_added(lines=5, context=2)}}' }}</code>
|
||||
</small>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_clean}}' }}</code></td>
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import psutil
|
||||
import time
|
||||
from threading import Thread
|
||||
import multiprocessing
|
||||
|
||||
import pytest
|
||||
import arrow
|
||||
@@ -191,6 +192,34 @@ def cleanup(datastore_path):
|
||||
if os.path.isfile(f):
|
||||
os.unlink(f)
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest environment before tests run.
|
||||
|
||||
CRITICAL: Set multiprocessing start method to 'fork' for Python 3.14+ compatibility.
|
||||
|
||||
Python 3.14 changed the default start method from 'fork' to 'forkserver' on Linux.
|
||||
The forkserver method requires all objects to be picklable, but pytest-flask's
|
||||
LiveServer uses nested functions that can't be pickled.
|
||||
|
||||
Setting 'fork' explicitly:
|
||||
- Maintains compatibility with Python 3.10-3.13 (where 'fork' was already default)
|
||||
- Fixes Python 3.14 pickling errors
|
||||
- Only affects Unix-like systems (Windows uses 'spawn' regardless)
|
||||
|
||||
See: https://github.com/python/cpython/issues/126831
|
||||
See: https://docs.python.org/3/whatsnew/3.14.html
|
||||
"""
|
||||
# Only set if not already set (respects existing configuration)
|
||||
if multiprocessing.get_start_method(allow_none=True) is None:
|
||||
try:
|
||||
# 'fork' is available on Unix-like systems (Linux, macOS)
|
||||
# On Windows, this will have no effect as 'spawn' is the only option
|
||||
multiprocessing.set_start_method('fork', force=False)
|
||||
logger.debug("Set multiprocessing start method to 'fork' for Python 3.14+ compatibility")
|
||||
except (ValueError, RuntimeError):
|
||||
# Already set, not available on this platform, or context already created
|
||||
pass
|
||||
|
||||
def pytest_addoption(parser):
|
||||
"""Add custom command-line options for pytest.
|
||||
|
||||
@@ -253,14 +282,13 @@ def prepare_test_function(live_server, datastore_path):
|
||||
# CRITICAL: Get datastore and stop it from writing stale data
|
||||
datastore = live_server.app.config.get('DATASTORE')
|
||||
|
||||
# Clear the queues before starting the test to prevent state leakage
|
||||
from changedetectionio.flask_app import update_q, llm_summary_q
|
||||
for q in (update_q, llm_summary_q):
|
||||
while not q.empty():
|
||||
try:
|
||||
q.get_nowait()
|
||||
except:
|
||||
break
|
||||
# Clear the queue before starting the test to prevent state leakage
|
||||
from changedetectionio.flask_app import update_q
|
||||
while not update_q.empty():
|
||||
try:
|
||||
update_q.get_nowait()
|
||||
except:
|
||||
break
|
||||
|
||||
# Add test helper methods to the app for worker management
|
||||
def set_workers(count):
|
||||
|
||||
@@ -807,6 +807,88 @@ def test_api_import_large_background(client, live_server, measure_memory_usage,
|
||||
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
|
||||
|
||||
|
||||
def test_api_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that processor_config_restock_diff is accepted by the API for watches using
|
||||
restock_diff processor, that its schema is validated (enum values, types), and that
|
||||
genuinely unknown fields are rejected with an error that originates from the
|
||||
OpenAPI spec validation layer.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create a watch in restock_diff mode WITH processor_config in the POST body (matches the API docs example)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"processor": "restock_diff",
|
||||
"title": "Restock test",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 8888888.0,
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 201
|
||||
watch_uuid = res.json.get('uuid')
|
||||
assert is_valid_uuid(watch_uuid)
|
||||
|
||||
# Verify the value set on POST is reflected in the UI edit page (not just via PUT)
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||
assert res.status_code == 200
|
||||
assert b'8888888' in res.data, "price_change_min set via POST should appear in the UI edit form"
|
||||
|
||||
# Valid processor_config_restock_diff update via PUT should also be accepted
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": False,
|
||||
"price_change_min": 8888888.0,
|
||||
"price_change_max": 9999999.0,
|
||||
}
|
||||
}),
|
||||
)
|
||||
assert res.status_code == 200, f"Valid processor_config_restock_diff should be accepted, got: {res.data}"
|
||||
|
||||
# Verify the updated value is still reflected in the UI edit page
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||
assert res.status_code == 200
|
||||
assert b'8888888' in res.data, "price_change_min set via PUT should appear in the UI edit form"
|
||||
|
||||
# An invalid enum value inside processor_config_restock_diff should be rejected by the spec
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "not_a_valid_enum_value"
|
||||
}
|
||||
}),
|
||||
)
|
||||
assert res.status_code == 400, "Invalid enum value in processor config should be rejected"
|
||||
assert b'Validation failed' in res.data, "Rejection should come from OpenAPI spec validation layer"
|
||||
|
||||
# A completely unknown field should be rejected (either by OpenAPI spec validation or
|
||||
# the application-level field filter — both are acceptable gatekeepers)
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"field_that_is_not_in_the_spec_at_all": "some value"}),
|
||||
)
|
||||
assert res.status_code == 400, "Unknown fields should be rejected"
|
||||
assert (b'Validation failed' in res.data or b'Unknown field' in res.data), \
|
||||
"Rejection should come from either the OpenAPI spec validation layer or application field filter"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
|
||||
@@ -12,6 +12,50 @@ from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
||||
|
||||
|
||||
def test_openapi_merged_spec_contains_restock_fields():
|
||||
"""
|
||||
Unit test: verify that build_merged_spec_dict() correctly merges the
|
||||
restock_diff processor api.yaml into the base spec so that
|
||||
WatchBase.properties includes processor_config_restock_diff with all
|
||||
expected sub-fields. No live server required.
|
||||
"""
|
||||
from changedetectionio.api import build_merged_spec_dict
|
||||
|
||||
spec = build_merged_spec_dict()
|
||||
schemas = spec['components']['schemas']
|
||||
|
||||
# The merged schema for processor_config_restock_diff should exist
|
||||
assert 'processor_config_restock_diff' in schemas, \
|
||||
"processor_config_restock_diff schema missing from merged spec"
|
||||
|
||||
restock_schema = schemas['processor_config_restock_diff']
|
||||
props = restock_schema.get('properties', {})
|
||||
|
||||
expected_fields = {
|
||||
'in_stock_processing',
|
||||
'follow_price_changes',
|
||||
'price_change_min',
|
||||
'price_change_max',
|
||||
'price_change_threshold_percent',
|
||||
}
|
||||
missing = expected_fields - set(props.keys())
|
||||
assert not missing, f"Missing fields in processor_config_restock_diff schema: {missing}"
|
||||
|
||||
# in_stock_processing must be an enum with the three valid values
|
||||
enum_values = set(props['in_stock_processing'].get('enum', []))
|
||||
assert enum_values == {'in_stock_only', 'all_changes', 'off'}, \
|
||||
f"Unexpected enum values for in_stock_processing: {enum_values}"
|
||||
|
||||
# WatchBase.properties must carry a $ref to the restock schema so the
|
||||
# validation middleware can enforce it on every POST/PUT to /watch
|
||||
watchbase_props = schemas['WatchBase']['properties']
|
||||
assert 'processor_config_restock_diff' in watchbase_props, \
|
||||
"processor_config_restock_diff not wired into WatchBase.properties"
|
||||
ref = watchbase_props['processor_config_restock_diff'].get('$ref', '')
|
||||
assert 'processor_config_restock_diff' in ref, \
|
||||
f"WatchBase.processor_config_restock_diff should $ref the schema, got: {ref}"
|
||||
|
||||
|
||||
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that creating a watch with invalid content-type triggers OpenAPI validation error."""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
@@ -176,6 +176,97 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
assert res.status_code == 204
|
||||
|
||||
|
||||
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
|
||||
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({
|
||||
"title": "Restock Group",
|
||||
"overrides_watch": True,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 7777777
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
|
||||
tag_uuid = res.json.get('uuid')
|
||||
|
||||
# Verify processor config was saved during creation (the bug: these were discarded)
|
||||
res = client.get(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
tag_data = res.json
|
||||
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
|
||||
"processor_config_restock_diff should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
|
||||
"price_change_min should be saved on POST"
|
||||
|
||||
# Update tag with valid processor_config_restock_diff via PUT
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"overrides_watch": True,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 8888888
|
||||
}
|
||||
})
|
||||
)
|
||||
assert res.status_code == 200, f"PUT tag with restock config failed: {res.data}"
|
||||
|
||||
# Verify the config was stored via API
|
||||
res = client.get(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
tag_data = res.json
|
||||
assert tag_data.get('overrides_watch') == True
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only'
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 8888888
|
||||
|
||||
# Verify the value is also reflected in the UI tag edit page
|
||||
res = client.get(url_for("tags.form_tag_edit", uuid=tag_uuid))
|
||||
assert res.status_code == 200
|
||||
assert b'8888888' in res.data, "price_change_min set via API should appear in the UI tag edit form"
|
||||
|
||||
# Invalid enum value should be rejected by OpenAPI spec validation
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "not_a_valid_value"
|
||||
}
|
||||
})
|
||||
)
|
||||
assert res.status_code == 400
|
||||
assert b'Validation failed' in res.data
|
||||
|
||||
# Clean up
|
||||
res = client.delete(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 204
|
||||
|
||||
|
||||
def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test the full round trip, this way we test the default Model fits back into OpenAPI spec
|
||||
|
||||
@@ -48,6 +48,15 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
# Check POST preview
|
||||
res = client.post(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
|
||||
# Make a change
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
||||
from flask import url_for
|
||||
import io
|
||||
from zipfile import ZipFile
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
import re
|
||||
import time
|
||||
from changedetectionio.model import Watch, Tag
|
||||
@@ -68,6 +68,9 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Check for changedetection.json (settings file)
|
||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||
|
||||
# secret.txt must never be included — it contains the Flask session key
|
||||
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
url_for("backups.remove_backups"),
|
||||
@@ -196,4 +199,63 @@ def test_backup_restore(client, live_server, measure_memory_usage, datastore_pat
|
||||
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
|
||||
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
|
||||
assert isinstance(restored_tag2, Tag.model), \
|
||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||
|
||||
|
||||
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Zip Slip path traversal entries in a restore zip must be rejected."""
|
||||
import pytest
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# Build a zip with a path traversal entry that would escape the extraction dir
|
||||
malicious_zip = io.BytesIO()
|
||||
with ZipFile(malicious_zip, 'w') as zf:
|
||||
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
|
||||
malicious_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
with pytest.raises(ValueError, match="Zip Slip"):
|
||||
import_from_zip(
|
||||
zip_stream=malicious_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
|
||||
|
||||
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""A zip whose total uncompressed size exceeds the limit must be rejected.
|
||||
|
||||
The guard reads file_size from the zip central-directory metadata — no
|
||||
actual decompression happens, so this test is fast and uses minimal RAM.
|
||||
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
|
||||
50 KB is enough to trigger the check without creating any large files.
|
||||
"""
|
||||
import pytest
|
||||
import changedetectionio.blueprint.backups.restore as restore_mod
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
|
||||
bomb_zip = io.BytesIO()
|
||||
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
|
||||
zf.writestr("data.txt", b"\x00" * (100 * 1024))
|
||||
bomb_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
|
||||
try:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
|
||||
with pytest.raises(ValueError, match="decompressed size"):
|
||||
import_from_zip(
|
||||
zip_stream=bomb_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
finally:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
@@ -11,6 +12,69 @@ import os
|
||||
|
||||
|
||||
|
||||
def test_surrogate_characters_in_content_are_sanitized():
|
||||
"""Lone surrogates can appear in requests' r.text when a server returns malformed/mixed-encoding
|
||||
content. Without sanitization, encoding to UTF-8 raises UnicodeEncodeError.
|
||||
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
content_with_surrogate = '<html><body>Hello \udcad World</body></html>'
|
||||
|
||||
# Confirm the raw problem exists
|
||||
with pytest.raises(UnicodeEncodeError):
|
||||
content_with_surrogate.encode('utf-8')
|
||||
|
||||
# Our fix: sanitize after fetcher.run() in processors/base.py call_browser()
|
||||
sanitized = content_with_surrogate.encode('utf-8', errors='replace').decode('utf-8')
|
||||
assert 'Hello' in sanitized
|
||||
assert 'World' in sanitized
|
||||
assert '\udcad' not in sanitized
|
||||
|
||||
# Checksum computation (processors/base.py get_raw_document_checksum) must not crash
|
||||
hashlib.md5(sanitized.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def test_utf8_content_without_charset_header(client, live_server, datastore_path):
|
||||
"""Server returns UTF-8 content but no charset in Content-Type header.
|
||||
chardet can misdetect such pages as UTF-7 (Python 3.14 then produces surrogates).
|
||||
Our fix tries UTF-8 first before falling back to chardet.
|
||||
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
# UTF-8 encoded content with non-ASCII chars - no charset will be in the header
|
||||
html = '<html><body><p>Español</p><p>Français</p><p>日本語</p></body></html>'
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('utf-8'), mode='wb')
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||
# Should decode correctly as UTF-8, not produce mojibake (Español) or replacement chars
|
||||
assert 'Español'.encode('utf-8') in res.data
|
||||
assert 'Français'.encode('utf-8') in res.data
|
||||
assert '日本語'.encode('utf-8') in res.data
|
||||
|
||||
|
||||
def test_shiftjis_with_meta_charset(client, live_server, datastore_path):
|
||||
"""Server returns Shift-JIS content with no charset in HTTP header, but the HTML
|
||||
declares <meta charset="Shift-JIS">. We should use the meta tag, not chardet.
|
||||
Real-world case: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
japanese_text = '日本語のページ'
|
||||
html = f'<html><head><meta http-equiv="Content-Type" content="text/html;charset=Shift-JIS"></head><body><p>{japanese_text}</p></body></html>'
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('shift_jis'), mode='wb')
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||
assert japanese_text.encode('utf-8') in res.data
|
||||
|
||||
|
||||
def set_html_response(datastore_path):
|
||||
test_return_data = """
|
||||
<html><body><span class="nav_second_img_text">
|
||||
|
||||
@@ -171,6 +171,7 @@ def test_group_tag_notification(client, live_server, measure_memory_usage, datas
|
||||
delete_all_watches(client)
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
notification_url_endpoint = url_for('test_notification_endpoint', _external=True).replace('http', 'post')
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
@@ -181,35 +182,50 @@ def test_group_tag_notification(client, live_server, measure_memory_usage, datas
|
||||
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
|
||||
notification_form_data = {"notification_urls": notification_url,
|
||||
"notification_title": "New GROUP TAG ChangeDetection.io Notification - {{watch_url}}",
|
||||
"notification_body": "BASE URL: {{base_url}}\n"
|
||||
"Watch URL: {{watch_url}}\n"
|
||||
"Watch UUID: {{watch_uuid}}\n"
|
||||
"Watch title: {{watch_title}}\n"
|
||||
"Watch tag: {{watch_tag}}\n"
|
||||
"Preview: {{preview_url}}\n"
|
||||
"Diff URL: {{diff_url}}\n"
|
||||
"Snapshot: {{current_snapshot}}\n"
|
||||
"Diff: {{diff}}\n"
|
||||
"Diff Added: {{diff_added}}\n"
|
||||
"Diff Removed: {{diff_removed}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
"Diff as Patch: {{diff_patch}}\n"
|
||||
":-)",
|
||||
"notification_screenshot": True,
|
||||
"notification_format": 'text',
|
||||
"title": "test-tag"}
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
group_tag_form_data = {
|
||||
"notification_title": "New GROUP TAG ChangeDetection.io Notification - {{watch_url}}",
|
||||
"notification_body": "BASE URL: {{base_url}}\n"
|
||||
"Watch URL: {{watch_url}}\n"
|
||||
"Watch UUID: {{watch_uuid}}\n"
|
||||
"Watch title: {{watch_title}}\n"
|
||||
"Watch tag: {{watch_tag}}\n"
|
||||
"Preview: {{preview_url}}\n"
|
||||
"Diff URL: {{diff_url}}\n"
|
||||
"Snapshot: {{current_snapshot}}\n"
|
||||
"Diff: {{diff}}\n"
|
||||
"Diff Added: {{diff_added}}\n"
|
||||
"Diff Removed: {{diff_removed}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
"Diff as Patch: {{diff_patch}}\n"
|
||||
":-)",
|
||||
"notification_screenshot": True,
|
||||
"notification_format": 'text',
|
||||
}
|
||||
|
||||
# Setup for test-tag
|
||||
group_tag_form_data['notification_urls'] = notification_url_endpoint+"?outputfilename=test-tag.txt"
|
||||
group_tag_form_data['title'] = 'test-tag'
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid=get_UUID_for_tag_name(client, name="test-tag")),
|
||||
data=notification_form_data,
|
||||
data=group_tag_form_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated" in res.data
|
||||
|
||||
# Setup for other-tag, we only add notifications-urls
|
||||
group_tag_form_data['notification_urls'] = notification_url_endpoint+"?outputfilename=other-tag.txt"
|
||||
group_tag_form_data['title'] = 'other-tag'
|
||||
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid=get_UUID_for_tag_name(client, name="other-tag")),
|
||||
data=group_tag_form_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
@@ -217,12 +233,14 @@ def test_group_tag_notification(client, live_server, measure_memory_usage, datas
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
assert os.path.isfile(os.path.join(datastore_path, "notification.txt"))
|
||||
assert os.path.isfile(os.path.join(datastore_path, "test-tag.txt"))
|
||||
assert os.path.isfile(os.path.join(datastore_path, "other-tag.txt"))
|
||||
|
||||
# @todo assert the group name or other unique body is in other-tag.txt
|
||||
# Verify what was sent as a notification, this file should exist
|
||||
with open(os.path.join(datastore_path, "notification.txt"), "r") as f:
|
||||
with open(os.path.join(datastore_path, "test-tag.txt"), "r") as f:
|
||||
notification_submission = f.read()
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
os.unlink(os.path.join(datastore_path, "test-tag.txt"))
|
||||
|
||||
# Did we see the URL that had a change, in the notification?
|
||||
# Diff was correctly executed
|
||||
|
||||
@@ -624,3 +624,76 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
|
||||
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
||||
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
||||
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
||||
|
||||
|
||||
def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that clearing snapshot history works with translated confirmation text.
|
||||
|
||||
Issue #3865: When the app language is set to German, the clear history
|
||||
confirmation dialog shows the translated word (e.g. 'loschen') but the
|
||||
backend only accepted the English word 'clear', making it impossible
|
||||
to clear snapshots in non-English languages.
|
||||
"""
|
||||
from flask import url_for
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Add a watch so there is history to clear
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Set language to German
|
||||
res = client.get(
|
||||
url_for("set_language", locale="de"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
|
||||
# Verify the clear history page shows the German confirmation word
|
||||
res = client.get(
|
||||
url_for("ui.clear_all_history"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
|
||||
|
||||
# Submit the form with the German translated word
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={"confirmtext": "löschen"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
# Should NOT show error message
|
||||
assert b"Incorrect confirmation text" not in res.data, \
|
||||
"German confirmation word 'loschen' should be accepted (issue #3865)"
|
||||
|
||||
# Switch back to English and verify English word still works
|
||||
res = client.get(
|
||||
url_for("set_language", locale="en_US"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={"confirmtext": "clear"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert b"Incorrect confirmation text" not in res.data, \
|
||||
"English confirmation word 'clear' should still be accepted"
|
||||
|
||||
# Verify that missing/empty confirmtext does not crash the server
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200, \
|
||||
"Missing confirmtext should not crash the server"
|
||||
|
||||
@@ -1,260 +0,0 @@
|
||||
"""
|
||||
Tests for LLM summary queue, worker, and regenerate route.
|
||||
|
||||
Mocking strategy
|
||||
----------------
|
||||
- `_call_llm` is patched at the module level so no real LiteLLM/API calls are made.
|
||||
- `_write_summary` is left un-patched so we can assert the file was actually written.
|
||||
- `process_llm_summary` is called directly in unit tests (no worker thread needed).
|
||||
"""
|
||||
|
||||
import os
|
||||
import queue
|
||||
import time
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
from flask import url_for
|
||||
|
||||
from changedetectionio.tests.util import set_original_response, set_modified_response, wait_for_all_checks
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unit tests — process_llm_summary directly, no HTTP, no worker thread
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestProcessLlmSummary:
|
||||
|
||||
def _make_watch_with_two_snapshots(self, client, datastore_path):
|
||||
"""Helper: returns (datastore, uuid, snapshot_id) with 2 history entries."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
datastore = client.application.config['DATASTORE']
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
uuid = datastore.add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
history_keys = list(watch.history.keys())
|
||||
snapshot_id = os.path.basename(watch.history[history_keys[1]]).split('.')[0]
|
||||
return datastore, uuid, snapshot_id
|
||||
|
||||
def test_writes_summary_file(self, client, live_server, datastore_path):
|
||||
"""process_llm_summary writes {snapshot_id}-llm.txt when _call_llm succeeds."""
|
||||
datastore, uuid, snapshot_id = self._make_watch_with_two_snapshots(client, datastore_path)
|
||||
watch = datastore.data['watching'][uuid]
|
||||
item = {'uuid': uuid, 'snapshot_id': snapshot_id, 'attempts': 0}
|
||||
|
||||
from changedetectionio.llm.queue_worker import process_llm_summary
|
||||
with patch('changedetectionio.llm.queue_worker._call_llm', return_value='Price dropped from $10 to $8.') as mock_llm:
|
||||
process_llm_summary(item, datastore)
|
||||
|
||||
assert mock_llm.called
|
||||
summary_path = os.path.join(watch.data_dir, f"{snapshot_id}-llm.txt")
|
||||
assert os.path.exists(summary_path), "Summary file was not written"
|
||||
assert open(summary_path).read() == 'Price dropped from $10 to $8.'
|
||||
|
||||
def test_call_llm_uses_temperature_zero_and_seed(self, client, live_server, datastore_path):
|
||||
"""_call_llm always passes temperature=0 and seed=0 to litellm for determinism."""
|
||||
import litellm
|
||||
from changedetectionio.llm.queue_worker import _call_llm
|
||||
|
||||
messages = [{'role': 'user', 'content': 'hello'}]
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices[0].message.content = 'ok'
|
||||
|
||||
with patch('litellm.completion', return_value=mock_response) as mock_completion:
|
||||
_call_llm(model='gpt-4o-mini', messages=messages)
|
||||
|
||||
call_kwargs = mock_completion.call_args.kwargs
|
||||
assert call_kwargs['temperature'] == 0, "temperature must be 0"
|
||||
assert call_kwargs['seed'] == 0, "seed must be 0 for reproducibility"
|
||||
assert 'top_p' not in call_kwargs, "top_p must not be set (redundant at temp=0)"
|
||||
assert 'frequency_penalty' not in call_kwargs, "frequency_penalty must not be set"
|
||||
assert 'presence_penalty' not in call_kwargs, "presence_penalty must not be set"
|
||||
|
||||
def test_skips_first_history_entry(self, client, live_server, datastore_path):
|
||||
"""process_llm_summary raises ValueError for the first history entry (no prior to diff)."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
datastore = client.application.config['DATASTORE']
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
uuid = datastore.add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
history_keys = list(watch.history.keys())
|
||||
first_fname = watch.history[history_keys[0]]
|
||||
snapshot_id = os.path.basename(first_fname).split('.')[0]
|
||||
|
||||
item = {'uuid': uuid, 'snapshot_id': snapshot_id, 'attempts': 0}
|
||||
|
||||
from changedetectionio.llm.queue_worker import process_llm_summary
|
||||
with pytest.raises(ValueError, match="first history entry"):
|
||||
process_llm_summary(item, datastore)
|
||||
|
||||
def test_raises_for_unknown_watch(self, client, live_server, datastore_path):
|
||||
"""process_llm_summary raises ValueError if the watch UUID doesn't exist."""
|
||||
datastore = client.application.config['DATASTORE']
|
||||
item = {'uuid': 'does-not-exist', 'snapshot_id': 'abc123', 'attempts': 0}
|
||||
|
||||
from changedetectionio.llm.queue_worker import process_llm_summary
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
process_llm_summary(item, datastore)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unit tests — worker retry logic, no HTTP
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestWorkerRetry:
|
||||
|
||||
def test_requeues_on_failure_with_backoff(self, client, live_server, datastore_path):
|
||||
"""Worker re-queues a failed item with incremented attempts and future next_retry_at."""
|
||||
from changedetectionio.llm.queue_worker import MAX_RETRIES, RETRY_BACKOFF_BASE_SECONDS
|
||||
|
||||
llm_q = queue.Queue()
|
||||
app = client.application
|
||||
datastore = client.application.config['DATASTORE']
|
||||
|
||||
item = {'uuid': 'fake-uuid', 'snapshot_id': 'abc123', 'attempts': 0}
|
||||
llm_q.put(item)
|
||||
|
||||
from changedetectionio.llm.queue_worker import process_llm_summary
|
||||
with patch('changedetectionio.llm.queue_worker.process_llm_summary', side_effect=RuntimeError("API down")):
|
||||
# Run one iteration manually (don't start the full runner thread)
|
||||
from changedetectionio.llm import queue_worker
|
||||
got = llm_q.get(block=False)
|
||||
try:
|
||||
queue_worker.process_llm_summary(got, datastore)
|
||||
except Exception as e:
|
||||
got['attempts'] += 1
|
||||
got['next_retry_at'] = time.time() + RETRY_BACKOFF_BASE_SECONDS * (2 ** (got['attempts'] - 1))
|
||||
llm_q.put(got)
|
||||
|
||||
assert llm_q.qsize() == 1
|
||||
requeued = llm_q.get_nowait()
|
||||
assert requeued['attempts'] == 1
|
||||
assert requeued['next_retry_at'] > time.time()
|
||||
|
||||
def test_drops_after_max_retries(self, client, live_server, datastore_path):
|
||||
"""Worker drops item and records last_error after MAX_RETRIES exhausted."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
datastore = client.application.config['DATASTORE']
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
uuid = datastore.add_watch(url=test_url)
|
||||
|
||||
from changedetectionio.llm.queue_worker import MAX_RETRIES
|
||||
item = {'uuid': uuid, 'snapshot_id': 'abc123', 'attempts': MAX_RETRIES}
|
||||
|
||||
llm_q = queue.Queue()
|
||||
llm_q.put(item)
|
||||
|
||||
with patch('changedetectionio.llm.queue_worker.process_llm_summary', side_effect=RuntimeError("still down")):
|
||||
from changedetectionio.llm import queue_worker
|
||||
got = llm_q.get(block=False)
|
||||
try:
|
||||
queue_worker.process_llm_summary(got, datastore)
|
||||
except Exception as e:
|
||||
if got['attempts'] < MAX_RETRIES:
|
||||
llm_q.put(got)
|
||||
else:
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
# Queue should be empty — item was dropped
|
||||
assert llm_q.empty()
|
||||
watch = datastore.data['watching'][uuid]
|
||||
assert 'still down' in (watch.get('last_error') or '')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Route tests — GET /edit/<uuid>/regenerate-llm-summaries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRegenerateLlmSummariesRoute:
|
||||
|
||||
def test_queues_missing_summaries(self, client, live_server, datastore_path):
|
||||
"""Route queues one item per history entry that lacks a -llm.txt file."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
datastore = client.application.config['DATASTORE']
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
uuid = datastore.add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
assert watch.history_n >= 2
|
||||
|
||||
from changedetectionio.flask_app import llm_summary_q
|
||||
|
||||
res = client.get(
|
||||
url_for('ui.ui_edit.watch_regenerate_llm_summaries', uuid=uuid),
|
||||
follow_redirects=True,
|
||||
)
|
||||
assert res.status_code == 200
|
||||
|
||||
# history_n - 1 items queued (first entry skipped, no prior to diff)
|
||||
expected = watch.history_n - 1
|
||||
assert llm_summary_q.qsize() == expected
|
||||
|
||||
# Each item has the right shape
|
||||
items = []
|
||||
while not llm_summary_q.empty():
|
||||
items.append(llm_summary_q.get_nowait())
|
||||
|
||||
for item in items:
|
||||
assert item['uuid'] == uuid
|
||||
assert item['attempts'] == 0
|
||||
assert len(item['snapshot_id']) == 32 # MD5 hex
|
||||
|
||||
def test_skips_already_summarised_entries(self, client, live_server, datastore_path):
|
||||
"""Route skips entries where {snapshot_id}-llm.txt already exists."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
datastore = client.application.config['DATASTORE']
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
uuid = datastore.add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
history_keys = list(watch.history.keys())
|
||||
second_fname = watch.history[history_keys[1]]
|
||||
snapshot_id = os.path.basename(second_fname).split('.')[0]
|
||||
|
||||
# Pre-write a summary file
|
||||
summary_path = os.path.join(watch.data_dir, f"{snapshot_id}-llm.txt")
|
||||
with open(summary_path, 'w') as f:
|
||||
f.write('already done')
|
||||
|
||||
from changedetectionio.flask_app import llm_summary_q
|
||||
|
||||
client.get(
|
||||
url_for('ui.ui_edit.watch_regenerate_llm_summaries', uuid=uuid),
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
# That entry should have been skipped — queue should be empty
|
||||
assert llm_summary_q.empty()
|
||||
|
||||
def test_404_for_unknown_watch(self, client, live_server, datastore_path):
|
||||
res = client.get(
|
||||
url_for('ui.ui_edit.watch_regenerate_llm_summaries', uuid='does-not-exist'),
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert res.status_code == 404
|
||||
@@ -108,6 +108,7 @@ def test_check_notification(client, live_server, measure_memory_usage, datastore
|
||||
"Diff Added: {{diff_added}}\n"
|
||||
"Diff Removed: {{diff_removed}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
"Diff with args: {{diff(context=3)}}"
|
||||
"Diff as Patch: {{diff_patch}}\n"
|
||||
"Change datetime: {{change_datetime}}\n"
|
||||
"Change datetime format: Weekday {{change_datetime(format='%A')}}\n"
|
||||
|
||||
@@ -109,7 +109,7 @@ def test_itemprop_price_change(client, live_server, measure_memory_usage, datast
|
||||
set_original_response(props_markup=instock_props[0], price='120.45', datastore_path=datastore_path)
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={"restock_settings-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||
data={"processor_config_restock_diff-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -204,9 +204,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form, datastore_path):
|
||||
def test_restock_itemprop_minmax(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
extras = {
|
||||
"restock_settings-follow_price_changes": "y",
|
||||
"restock_settings-price_change_min": 900.0,
|
||||
"restock_settings-price_change_max": 1100.10
|
||||
"processor_config_restock_diff-follow_price_changes": "y",
|
||||
"processor_config_restock_diff-price_change_min": 900.0,
|
||||
"processor_config_restock_diff-price_change_max": 1100.10
|
||||
}
|
||||
_run_test_minmax_limit(client, extra_watch_edit_form=extras, datastore_path=datastore_path)
|
||||
|
||||
@@ -223,9 +223,9 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid="first"),
|
||||
data={"name": "test-tag",
|
||||
"restock_settings-follow_price_changes": "y",
|
||||
"restock_settings-price_change_min": 900.0,
|
||||
"restock_settings-price_change_max": 1100.10,
|
||||
"processor_config_restock_diff-follow_price_changes": "y",
|
||||
"processor_config_restock_diff-price_change_min": 900.0,
|
||||
"processor_config_restock_diff-price_change_max": 1100.10,
|
||||
"overrides_watch": "y", #overrides_watch should be restock_overrides_watch
|
||||
},
|
||||
follow_redirects=True
|
||||
@@ -258,8 +258,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={"restock_settings-follow_price_changes": "y",
|
||||
"restock_settings-price_change_threshold_percent": 5.0,
|
||||
data={"processor_config_restock_diff-follow_price_changes": "y",
|
||||
"processor_config_restock_diff-price_change_threshold_percent": 5.0,
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
"headers": "",
|
||||
@@ -305,8 +305,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={"restock_settings-follow_price_changes": "y",
|
||||
"restock_settings-price_change_threshold_percent": 5.05,
|
||||
data={"processor_config_restock_diff-follow_price_changes": "y",
|
||||
"processor_config_restock_diff-price_change_threshold_percent": 5.05,
|
||||
"processor": "text_json_diff",
|
||||
"url": test_url,
|
||||
'fetch_backend': "html_requests",
|
||||
@@ -467,3 +467,38 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
|
||||
assert b'155.55' in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_itemprop_as_str(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
test_return_data = f"""<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<span itemprop="offers" itemscope itemtype="http://schema.org/Offer">
|
||||
<meta content="767.55" itemprop="price"/>
|
||||
<meta content="EUR" itemprop="priceCurrency"/>
|
||||
<meta content="InStock" itemprop="availability"/>
|
||||
<meta content="https://www.123-test.dk" itemprop="url"/>
|
||||
</span>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'767.55' in res.data
|
||||
@@ -34,6 +34,7 @@ def test_favicon(client, live_server, measure_memory_usage, datastore_path):
|
||||
favicon_base_64=SVG_BASE64
|
||||
)
|
||||
|
||||
|
||||
res = client.get(url_for('static_content', group='favicon', filename=uuid))
|
||||
assert res.status_code == 200
|
||||
assert len(res.data) > 10
|
||||
@@ -583,13 +584,16 @@ def test_static_directory_traversal(client, live_server, measure_memory_usage, d
|
||||
|
||||
def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
SSRF protection: IANA-reserved/private IP addresses must be blocked by default.
|
||||
SSRF protection: IANA-reserved/private IP addresses are blocked at fetch-time, not add-time.
|
||||
|
||||
Watches targeting private/reserved IPs can be *added* freely; the block happens when the
|
||||
fetcher actually tries to reach the URL (via validate_iana_url() in call_browser()).
|
||||
|
||||
Covers:
|
||||
1. is_private_hostname() correctly classifies all reserved ranges
|
||||
2. is_safe_valid_url() rejects private-IP URLs at add-time (env var off)
|
||||
3. is_safe_valid_url() allows private-IP URLs when ALLOW_IANA_RESTRICTED_ADDRESSES=true
|
||||
4. UI form rejects private-IP URLs and shows the standard error message
|
||||
2. is_safe_valid_url() ALLOWS private-IP URLs at add-time (IANA check moved to fetch-time)
|
||||
3. ALLOW_IANA_RESTRICTED_ADDRESSES has no effect on add-time; it only controls fetch-time
|
||||
4. UI form accepts private-IP URLs at add-time without error
|
||||
5. Requests fetcher blocks fetch-time DNS rebinding (fresh check on every fetch)
|
||||
6. Requests fetcher blocks redirects that lead to a private IP (open-redirect bypass)
|
||||
|
||||
@@ -601,8 +605,6 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
|
||||
from changedetectionio.validate_url import is_safe_valid_url, is_private_hostname
|
||||
|
||||
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
|
||||
# Clear any URL results cached while the env var was 'true'
|
||||
is_safe_valid_url.cache_clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 1. is_private_hostname() — unit tests across all reserved ranges
|
||||
@@ -624,9 +626,10 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
|
||||
assert not is_private_hostname(host), f"{host} should be identified as public"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 2. is_safe_valid_url() blocks private-IP URLs (env var off)
|
||||
# 2. is_safe_valid_url() ALLOWS private-IP URLs at add-time
|
||||
# IANA check is no longer done here — it moved to fetch-time validate_iana_url()
|
||||
# ------------------------------------------------------------------
|
||||
blocked_urls = [
|
||||
private_ip_urls = [
|
||||
'http://127.0.0.1/',
|
||||
'http://10.0.0.1/',
|
||||
'http://172.16.0.1/',
|
||||
@@ -637,23 +640,24 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
|
||||
'http://[fc00::1]/',
|
||||
'http://[fe80::1]/',
|
||||
]
|
||||
for url in blocked_urls:
|
||||
assert not is_safe_valid_url(url), f"{url} should be blocked by is_safe_valid_url"
|
||||
for url in private_ip_urls:
|
||||
assert is_safe_valid_url(url), f"{url} should be allowed by is_safe_valid_url (IANA check is at fetch-time)"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 3. ALLOW_IANA_RESTRICTED_ADDRESSES=true bypasses the block
|
||||
# 3. ALLOW_IANA_RESTRICTED_ADDRESSES does not affect add-time validation
|
||||
# It only controls fetch-time blocking inside validate_iana_url()
|
||||
# ------------------------------------------------------------------
|
||||
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'true')
|
||||
is_safe_valid_url.cache_clear()
|
||||
assert is_safe_valid_url('http://127.0.0.1/'), \
|
||||
"Private IP should be allowed when ALLOW_IANA_RESTRICTED_ADDRESSES=true"
|
||||
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
|
||||
|
||||
# Restore the block for the remaining assertions
|
||||
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
|
||||
is_safe_valid_url.cache_clear()
|
||||
assert is_safe_valid_url('http://127.0.0.1/'), \
|
||||
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 4. UI form rejects private-IP URLs
|
||||
# 4. UI form accepts private-IP URLs at add-time
|
||||
# The watch is created; the SSRF block fires later at fetch-time
|
||||
# ------------------------------------------------------------------
|
||||
for url in ['http://127.0.0.1/', 'http://169.254.169.254/latest/meta-data/']:
|
||||
res = client.post(
|
||||
@@ -661,8 +665,8 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
|
||||
data={'url': url, 'tags': ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Watch protocol is not permitted or invalid URL format' in res.data, \
|
||||
f"UI should reject {url}"
|
||||
assert b'Watch protocol is not permitted or invalid URL format' not in res.data, \
|
||||
f"UI should accept {url} at add-time (SSRF is blocked at fetch-time)"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 5. Fetch-time DNS-rebinding check in the requests fetcher
|
||||
@@ -708,3 +712,35 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
|
||||
request_body=None,
|
||||
request_method='GET',
|
||||
)
|
||||
|
||||
|
||||
def test_unresolvable_hostname_is_allowed(client, live_server, monkeypatch):
|
||||
"""
|
||||
Unresolvable hostnames must NOT be blocked at add-time when ALLOW_IANA_RESTRICTED_ADDRESSES=false.
|
||||
|
||||
DNS failure (gaierror) at add-time does not mean the URL resolves to a private IP —
|
||||
the domain may simply be offline or not yet live. Blocking it would be a false positive.
|
||||
The real DNS-rebinding protection happens at fetch-time in call_browser().
|
||||
"""
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
|
||||
|
||||
url = 'http://this-host-does-not-exist-xyz987.invalid/some/path'
|
||||
|
||||
# Should pass URL validation despite being unresolvable
|
||||
assert is_safe_valid_url(url), \
|
||||
"Unresolvable hostname should pass is_safe_valid_url — DNS failure is not a private-IP signal"
|
||||
|
||||
# Should be accepted via the UI form and appear in the watch list
|
||||
res = client.post(
|
||||
url_for('ui.ui_views.form_quick_watch_add'),
|
||||
data={'url': url, 'tags': ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Watch protocol is not permitted or invalid URL format' not in res.data, \
|
||||
"UI should not reject a URL just because its hostname is unresolvable"
|
||||
|
||||
res = client.get(url_for('watchlist.index'))
|
||||
assert b'this-host-does-not-exist-xyz987.invalid' in res.data, \
|
||||
"Unresolvable hostname watch should appear in the watch overview list"
|
||||
|
||||
@@ -592,3 +592,74 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
|
||||
set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
|
||||
for content_type in RSS_XML_CONTENT_TYPES:
|
||||
_subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
|
||||
|
||||
|
||||
# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends
|
||||
# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions.
|
||||
def test_xpath_blocked_functions_unit():
|
||||
"""Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed)."""
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser
|
||||
from lxml import html
|
||||
|
||||
html_content = '<html><body><p>safe content</p></body></html>'
|
||||
|
||||
dangerous_expressions = [
|
||||
"unparsed-text('file:///etc/passwd')",
|
||||
"unparsed-text-lines('file:///etc/passwd')",
|
||||
"unparsed-text-available('file:///etc/passwd')",
|
||||
"doc('file:///etc/passwd')",
|
||||
"doc-available('file:///etc/passwd')",
|
||||
"environment-variable('PATH')",
|
||||
"available-environment-variables()",
|
||||
]
|
||||
|
||||
for expr in dangerous_expressions:
|
||||
# xpath_filter() must raise, not silently return file contents
|
||||
try:
|
||||
result = xpath_filter(expr, html_content)
|
||||
assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}"
|
||||
except elementpath.ElementPathError:
|
||||
pass # expected
|
||||
|
||||
# SafeXPath3Parser must reject the expression at parse time
|
||||
tree = html.fromstring(html_content)
|
||||
try:
|
||||
elementpath.select(tree, expr, parser=SafeXPath3Parser)
|
||||
assert False, f"SafeXPath3Parser should have raised for: {expr!r}"
|
||||
except elementpath.ElementPathError:
|
||||
pass # expected
|
||||
|
||||
# Sanity check: normal XPath still works
|
||||
result = xpath_filter('//p/text()', html_content)
|
||||
assert result == 'safe content'
|
||||
|
||||
|
||||
# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions.
|
||||
def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Edit-form validation must reject dangerous XPath 3.0 functions before they are stored."""
|
||||
from flask import url_for
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
dangerous_expressions = [
|
||||
"xpath:unparsed-text('file:///etc/passwd')",
|
||||
"xpath:environment-variable('PATH')",
|
||||
"xpath:doc('file:///etc/passwd')",
|
||||
]
|
||||
|
||||
for expr in dangerous_expressions:
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={"include_filters": expr, "url": test_url, "tags": "", "headers": "",
|
||||
'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"is not a valid XPath expression" in res.data, \
|
||||
f"Form should reject dangerous expression: {expr!r}"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -343,8 +343,11 @@ def new_live_server_setup(live_server):
|
||||
@live_server.app.route('/test_notification_endpoint', methods=['POST', 'GET'])
|
||||
def test_notification_endpoint():
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
|
||||
with open(os.path.join(datastore_path, "notification.txt"), "wb") as f:
|
||||
from loguru import logger
|
||||
# @todo make safe
|
||||
fname = request.args.get('outputfilename', "notification.txt")
|
||||
logger.debug(f"Writing test notification endpoint data to '{fname}' - {request.args}")
|
||||
with open(os.path.join(datastore_path, fname), "wb") as f:
|
||||
# Debug method, dump all POST to file also, used to prove #65
|
||||
data = request.stream.read()
|
||||
if data != None:
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1978,7 +1978,7 @@ msgstr "Format d'heure invalide. Utilisez HH:MM."
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Not a valid timezone name"
|
||||
msgstr "Ce n'est pas un nom de fuseau horaire valide"
|
||||
msgstr "Nom de fuseau horaire invalide"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "not set"
|
||||
@@ -2054,9 +2054,7 @@ msgstr "secondes"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Notification Body and Title is required when a Notification URL is used"
|
||||
msgstr ""
|
||||
"Le corps et le titre de la notification sont requis lorsqu'une URL de notification est utiliséeLe corps et le titre "
|
||||
"de la notification sont requis lorsqu'une URL de notification est utilisée"
|
||||
msgstr "Le corps et le titre de la notification sont requis lorsqu'une URL de notification est utilisée"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
#, python-format
|
||||
@@ -2185,11 +2183,11 @@ msgstr "Utilisez les paramètres globaux pour le temps entre la vérification et
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "CSS/JSONPath/JQ/XPath Filters"
|
||||
msgstr "Filtre CSS/xPath"
|
||||
msgstr "Filtre CSS/JSONPath/JQ/XPath"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Remove elements"
|
||||
msgstr "Sélectionner par élément"
|
||||
msgstr "Supprimer par élément"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Extract text"
|
||||
@@ -2337,7 +2335,7 @@ msgstr "URL du proxy"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Proxy URLs must start with http://, https:// or socks5://"
|
||||
msgstr "Les URL proxy doivent commencer par http://, https:// ou chaussettes5://"
|
||||
msgstr "Les URL proxy doivent commencer par http://, https:// ou socks5://"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Browser connection URL"
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@@ -61,7 +61,9 @@ def normalize_url_encoding(url):
|
||||
def is_private_hostname(hostname):
|
||||
"""Return True if hostname resolves to an IANA-restricted (private/reserved) IP address.
|
||||
|
||||
Fails closed: unresolvable hostnames return True (block them).
|
||||
Unresolvable hostnames return False (allow them) — DNS may be temporarily unavailable
|
||||
or the domain not yet live. The actual DNS rebinding attack is mitigated by fetch-time
|
||||
re-validation in requests.py, not by blocking unresolvable domains at add-time.
|
||||
Never cached — callers that need fresh DNS resolution (e.g. at fetch time) can call
|
||||
this directly without going through the lru_cached is_safe_valid_url().
|
||||
"""
|
||||
@@ -69,13 +71,15 @@ def is_private_hostname(hostname):
|
||||
for info in socket.getaddrinfo(hostname, None):
|
||||
ip = ipaddress.ip_address(info[4][0])
|
||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||
logger.warning(f"Hostname '{hostname} - {ip} - ip.is_private = {ip.is_private}, ip.is_loopback = {ip.is_loopback}, ip.is_link_local = {ip.is_link_local}, ip.is_reserved = {ip.is_reserved}")
|
||||
return True
|
||||
except socket.gaierror:
|
||||
return True
|
||||
except socket.gaierror as e:
|
||||
logger.warning(f"{hostname} error checking {str(e)}")
|
||||
return False
|
||||
logger.info(f"Hostname '{hostname}' is NOT private/IANA restricted.")
|
||||
return False
|
||||
|
||||
|
||||
@lru_cache(maxsize=10000)
|
||||
def is_safe_valid_url(test_url):
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
@@ -96,6 +100,19 @@ def is_safe_valid_url(test_url):
|
||||
logger.warning('URL validation failed: URL is empty or whitespace only')
|
||||
return False
|
||||
|
||||
# Per-request cache: same URL is often validated 2-3x per watchlist render (sort + display).
|
||||
# Flask's g is scoped to one request and auto-cleared on teardown, so dynamic Jinja2 URLs
|
||||
# like {{microtime()}} are always re-evaluated on the next request.
|
||||
# Falls back gracefully when called outside a request context (e.g. background workers).
|
||||
_cache_key = test_url
|
||||
try:
|
||||
from flask import g
|
||||
_cache = g.setdefault('_url_validation_cache', {})
|
||||
if _cache_key in _cache:
|
||||
return _cache[_cache_key]
|
||||
except RuntimeError:
|
||||
_cache = None # No app context
|
||||
|
||||
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
|
||||
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'
|
||||
|
||||
@@ -108,11 +125,14 @@ def is_safe_valid_url(test_url):
|
||||
test_url = r.sub('', test_url)
|
||||
|
||||
# Check the actual rendered URL in case of any Jinja markup
|
||||
try:
|
||||
test_url = jinja_render(test_url)
|
||||
except Exception as e:
|
||||
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
||||
return False
|
||||
# Only run jinja_render when the URL actually contains Jinja2 syntax - creating a new
|
||||
# ImmutableSandboxedEnvironment is expensive and is called once per watch per page load
|
||||
if '{%' in test_url or '{{' in test_url:
|
||||
try:
|
||||
test_url = jinja_render(test_url)
|
||||
except Exception as e:
|
||||
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
||||
return False
|
||||
|
||||
# Check query parameters and fragment
|
||||
if re.search(r'[<>]', test_url):
|
||||
@@ -138,12 +158,6 @@ def is_safe_valid_url(test_url):
|
||||
logger.warning(f'URL f"{test_url}" failed validation, aborting.')
|
||||
return False
|
||||
|
||||
# Block IANA-restricted (private/reserved) IP addresses unless explicitly allowed.
|
||||
# This is an add-time check; fetch-time re-validation in requests.py handles DNS rebinding.
|
||||
if not strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
|
||||
parsed = urlparse(test_url)
|
||||
if parsed.hostname and is_private_hostname(parsed.hostname):
|
||||
logger.warning(f'URL "{test_url}" resolves to a private/reserved IP address, aborting.')
|
||||
return False
|
||||
|
||||
if _cache is not None:
|
||||
_cache[_cache_key] = True
|
||||
return True
|
||||
|
||||
@@ -518,8 +518,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# (cleanup may delete these variables, but plugins need the original references)
|
||||
finalize_handler = update_handler # Capture now, before cleanup deletes it
|
||||
finalize_watch = watch # Capture now, before any modifications
|
||||
finalize_changed_detected = locals().get('changed_detected', False)
|
||||
finalize_snapshot_id = (locals().get('update_obj') or {}).get('previous_md5') or ''
|
||||
|
||||
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
|
||||
try:
|
||||
@@ -560,9 +558,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
update_handler=finalize_handler,
|
||||
watch=finalize_watch,
|
||||
datastore=datastore,
|
||||
processing_exception=processing_exception,
|
||||
changed_detected=finalize_changed_detected,
|
||||
snapshot_id=finalize_snapshot_id,
|
||||
processing_exception=processing_exception
|
||||
)
|
||||
except Exception as finalize_error:
|
||||
logger.error(f"Worker {worker_id} error in finalize hook: {finalize_error}")
|
||||
|
||||
+199
-2
@@ -108,9 +108,162 @@ tags:
|
||||
|
||||
- name: System Information
|
||||
description: |
|
||||
Retrieve system status and statistics about your changedetection.io instance, including total watch
|
||||
Retrieve system status and statistics about your changedetection.io instance, including total watch
|
||||
counts, uptime information, and version details.
|
||||
|
||||
- name: Plugin API Extensions
|
||||
description: |
|
||||
## How Processor Plugins Extend the API
|
||||
|
||||
changedetection.io uses a **processor plugin** system to handle different types of change detection.
|
||||
Each processor lives in `changedetectionio/processors/<name>/` and may include an `api.yaml` file
|
||||
that extends the core Watch schema with processor-specific configuration fields.
|
||||
|
||||
### How it works
|
||||
|
||||
At startup, changedetection.io scans all installed processors for an `api.yaml` file. Any schemas
|
||||
and code samples defined there are deep-merged into the live API specification, making the
|
||||
processor's configuration fields valid on all watch create and update requests.
|
||||
|
||||
The live, fully-merged spec is always available at `/api/v1/full-spec` — use that URL with
|
||||
Swagger UI or Redoc to see the complete schema for your specific installation.
|
||||
|
||||
---
|
||||
|
||||
### Writing a processor `api.yaml`
|
||||
|
||||
Place an `api.yaml` in the processor plugin's own directory, alongside its `__init__.py`
|
||||
(e.g. `changedetectionio/processors/my_processor/api.yaml`). The schema name **must** follow the
|
||||
convention `processor_config_<processor_name>` (e.g. `processor_config_restock_diff`). That same
|
||||
key is used as the JSON field name when creating or updating a watch.
|
||||
|
||||
A minimal `api.yaml` for a hypothetical `my_processor`:
|
||||
|
||||
```yaml
|
||||
components:
|
||||
schemas:
|
||||
processor_config_my_processor:
|
||||
type: object
|
||||
description: Configuration for my_processor
|
||||
properties:
|
||||
some_option:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Enable some behaviour
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: curl
|
||||
label: my_processor example
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com",
|
||||
"processor": "my_processor",
|
||||
"processor_config_my_processor": { "some_option": true }
|
||||
}'
|
||||
```
|
||||
|
||||
The `paths` section in `api.yaml` is used only for injecting additional `x-code-samples` into
|
||||
existing endpoints — you cannot define new routes via plugin.
|
||||
|
||||
---
|
||||
|
||||
### Built-in plugin: `restock_diff`
|
||||
|
||||
The `restock_diff` processor is always shipped with changedetection.io. It monitors product
|
||||
availability and price changes using structured data (JSON-LD / schema.org microdata) and
|
||||
text heuristics. It is activated by setting `"processor": "restock_diff"` on a watch.
|
||||
|
||||
It adds the `processor_config_restock_diff` block to the Watch schema with these fields:
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `in_stock_processing` | string | `in_stock_only` | `in_stock_only` — only alert Out-of-Stock→In-Stock · `all_changes` — alert any availability change · `off` — disable stock tracking |
|
||||
| `follow_price_changes` | boolean | `true` | Monitor and alert on price changes |
|
||||
| `price_change_min` | number\|null | — | Alert when price drops **below** this value |
|
||||
| `price_change_max` | number\|null | — | Alert when price rises **above** this value |
|
||||
| `price_change_threshold_percent` | number\|null | — | Minimum % change since the original price to trigger an alert |
|
||||
|
||||
#### CREATE — Add a restock/price monitor
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product/widget",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### READ — Retrieve the monitor
|
||||
|
||||
The response JSON includes `processor_config_restock_diff` alongside all standard watch fields:
|
||||
|
||||
```bash
|
||||
curl -X GET "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY"
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"uuid": "cc0cfffa-f449-477b-83ea-0caafd1dc091",
|
||||
"url": "https://example.com/product/widget",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5,
|
||||
"price_change_min": null,
|
||||
"price_change_max": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### UPDATE — Change thresholds without recreating the monitor
|
||||
|
||||
Only fields included in the request body are updated; omitted fields are left unchanged.
|
||||
|
||||
```bash
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### DELETE — Remove the monitor
|
||||
|
||||
```bash
|
||||
curl -X DELETE "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
For the complete schema-validated documentation including all processor fields, fetch the live spec
|
||||
and load it into Swagger UI or Redoc:
|
||||
|
||||
```
|
||||
GET /api/v1/full-spec
|
||||
```
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
ApiKeyAuth:
|
||||
@@ -1889,7 +2042,7 @@ paths:
|
||||
- lang: 'Python'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
|
||||
headers = {'x-api-key': 'YOUR_API_KEY'}
|
||||
response = requests.get('http://localhost:5000/api/v1/systeminfo', headers=headers)
|
||||
print(response.json())
|
||||
@@ -1905,3 +2058,47 @@ paths:
|
||||
tag_count: 5
|
||||
uptime: "2 days, 3:45:12"
|
||||
version: "0.50.10"
|
||||
|
||||
/full-spec:
|
||||
get:
|
||||
operationId: getFullApiSpec
|
||||
tags: [Plugin API Extensions]
|
||||
summary: Get full live API spec
|
||||
description: |
|
||||
Return the fully merged OpenAPI specification for this instance.
|
||||
|
||||
Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
|
||||
spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
|
||||
|
||||
**Use this URL** with Swagger UI or Redoc to get schema-accurate documentation for your
|
||||
specific install — it includes every `processor_config_<name>` schema block contributed by
|
||||
installed processors (e.g. `processor_config_restock_diff` from the built-in restock plugin).
|
||||
|
||||
This endpoint requires no authentication and returns YAML.
|
||||
|
||||
To load it directly in Swagger UI, paste the URL into the "Explore" box:
|
||||
```
|
||||
http://localhost:5000/api/v1/full-spec
|
||||
```
|
||||
security: []
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
source: |
|
||||
# Fetch the live merged spec (no API key needed)
|
||||
curl -X GET "http://localhost:5000/api/v1/full-spec"
|
||||
- lang: 'Python'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
# No authentication required
|
||||
response = requests.get('http://localhost:5000/api/v1/full-spec')
|
||||
print(response.text) # Returns YAML
|
||||
responses:
|
||||
'200':
|
||||
description: |
|
||||
Merged OpenAPI specification in YAML format. Includes all processor plugin schemas
|
||||
(e.g. `processor_config_restock_diff`) not present in the static `api-spec.yaml`.
|
||||
content:
|
||||
application/yaml:
|
||||
schema:
|
||||
type: string
|
||||
|
||||
+353
-9
File diff suppressed because one or more lines are too long
+1
-8
@@ -28,7 +28,7 @@ requests-file
|
||||
chardet>2.3.0
|
||||
|
||||
wtforms~=3.2
|
||||
jsonpath-ng~=1.7.0
|
||||
jsonpath-ng~=1.8.0
|
||||
|
||||
# Fast JSON serialization for better performance
|
||||
orjson~=3.11
|
||||
@@ -151,10 +151,3 @@ blinker
|
||||
pytest-xdist
|
||||
|
||||
|
||||
litellm
|
||||
# pydantic-core >=2.41 imports typing_extensions.Sentinel, which is absent in the
|
||||
# system-installed typing_extensions on many Linux distros (e.g. Ubuntu 22/24).
|
||||
# When the system path leaks into sys.path before the venv, the system copy is
|
||||
# cached first and the import fails at runtime inside the LLM worker thread.
|
||||
pydantic-core<2.41
|
||||
pydantic<2.12
|
||||
|
||||
Reference in New Issue
Block a user