mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-05-03 08:10:37 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f273ef89ed |
@@ -66,27 +66,27 @@ jobs:
|
||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Docker Hub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
@@ -103,19 +103,11 @@ jobs:
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -136,10 +128,10 @@ jobs:
|
||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
||||
echo "Github ref: ${{ github.ref }}"
|
||||
echo "Github ref name: ${{ github.ref_name }}"
|
||||
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
@@ -150,20 +142,11 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||
type=raw,value=latest
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
org.opencontainers.image.version=${{ github.event.release.tag_name }}
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
@@ -60,14 +60,14 @@ jobs:
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
@@ -52,13 +52,4 @@ jobs:
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
|
||||
|
||||
test-application-3-14:
|
||||
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.14'
|
||||
skip-pypuppeteer: false
|
||||
skip-pypuppeteer: true
|
||||
@@ -42,10 +42,10 @@ jobs:
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -99,7 +99,11 @@ jobs:
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -112,7 +116,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -161,14 +165,14 @@ jobs:
|
||||
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
@@ -184,7 +188,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -226,7 +230,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -266,7 +270,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -288,8 +292,8 @@ jobs:
|
||||
|
||||
- name: Specific tests in built container for Selenium
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
|
||||
|
||||
# SMTP tests
|
||||
smtp-tests:
|
||||
@@ -302,7 +306,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -330,7 +334,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -500,7 +504,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -540,7 +544,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -570,7 +574,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -583,10 +587,6 @@ jobs:
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
- name: Plugin get_html_head_extras hook injects into base.html
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -598,7 +598,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -643,7 +643,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -706,19 +706,7 @@ jobs:
|
||||
- name: Check upgrade works without error
|
||||
run: |
|
||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
|
||||
# Checkout old version and create datastore
|
||||
git checkout 0.49.1
|
||||
python3 -m venv .venv
|
||||
@@ -727,7 +715,7 @@ jobs:
|
||||
pip install 'pyOpenSSL>=23.2.0'
|
||||
|
||||
echo "=== Running version 0.49.1 to create datastore ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true python3 ./changedetection.py -C -d /tmp/data &
|
||||
python3 ./changedetection.py -C -d /tmp/data &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready
|
||||
@@ -775,7 +763,7 @@ jobs:
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||
TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||
|
||||
echo "=== Upgrade test output ==="
|
||||
cat /tmp/upgrade-test.log
|
||||
@@ -783,7 +771,7 @@ jobs:
|
||||
|
||||
# Now start the current version normally to verify the tag survived
|
||||
echo "=== Starting current version to verify tag exists after upgrade ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||
timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready and fetch UI
|
||||
@@ -832,7 +820,7 @@ jobs:
|
||||
|
||||
- name: Upload upgrade test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/upgrade-test.log
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
[python: **.py]
|
||||
keywords = _ _l gettext
|
||||
keywords = _:1,_l:1,gettext:1
|
||||
|
||||
[jinja2: **/templates/**.html]
|
||||
encoding = utf-8
|
||||
keywords = _ _l gettext
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.9'
|
||||
__version__ = '0.53.3'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -10,6 +10,7 @@ from json.decoder import JSONDecodeError
|
||||
from loguru import logger
|
||||
import getopt
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
import threading
|
||||
@@ -60,22 +61,8 @@ import time
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||
try:
|
||||
import ctypes as _ctypes
|
||||
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
|
||||
@@ -154,10 +154,11 @@ class Import(Resource):
|
||||
if extras['processor'] not in available:
|
||||
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
|
||||
|
||||
# Validate fetch_backend if provided (legacy API compat — still accepted, stored as-is)
|
||||
# Validate fetch_backend if provided
|
||||
if 'fetch_backend' in extras:
|
||||
from changedetectionio.content_fetchers import available_fetchers
|
||||
available = [f[0] for f in available_fetchers()]
|
||||
# Also allow 'system' and extra_browser_* patterns
|
||||
is_valid = (
|
||||
extras['fetch_backend'] == 'system' or
|
||||
extras['fetch_backend'] in available or
|
||||
@@ -166,14 +167,6 @@ class Import(Resource):
|
||||
if not is_valid:
|
||||
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
|
||||
|
||||
# Validate browser_profile if provided
|
||||
if 'browser_profile' in extras:
|
||||
from changedetectionio.model.browser_profile import get_builtin_profiles, RESERVED_MACHINE_NAMES
|
||||
store_profiles = self.datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
known = set(get_builtin_profiles().keys()) | set(store_profiles.keys()) | {'system', None}
|
||||
if extras['browser_profile'] not in known:
|
||||
return f"Invalid browser_profile '{extras['browser_profile']}'. Available: {', '.join(str(k) for k in known)}", 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in extras:
|
||||
from wtforms import ValidationError
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
import functools
|
||||
from flask import make_response
|
||||
from flask_restful import Resource
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_spec_yaml():
|
||||
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
|
||||
import yaml
|
||||
from changedetectionio.api import build_merged_spec_dict
|
||||
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
|
||||
|
||||
|
||||
class Spec(Resource):
|
||||
def get(self):
|
||||
"""Return the merged OpenAPI spec including all registered processor extensions."""
|
||||
return make_response(
|
||||
_get_spec_yaml(),
|
||||
200,
|
||||
{'Content-Type': 'application/yaml'}
|
||||
)
|
||||
@@ -17,7 +17,7 @@ class Tag(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single tag
|
||||
# curl http://localhost:5000/api/v1/tag/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
@@ -85,9 +85,6 @@ class Tag(Resource):
|
||||
# Create clean tag dict without Watch-specific fields
|
||||
clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}
|
||||
|
||||
# fetch_backend is a legacy field superseded by browser_profile — omit from API response
|
||||
clean_tag.pop('fetch_backend', None)
|
||||
|
||||
return clean_tag
|
||||
|
||||
@auth.check_token
|
||||
@@ -100,6 +97,17 @@ class Tag(Resource):
|
||||
# Delete the tag, and any tag reference
|
||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches
|
||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
@@ -180,13 +188,6 @@ class Tag(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_tag(title=title)
|
||||
if new_uuid:
|
||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||
if extra:
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||
if tag:
|
||||
tag.update(extra)
|
||||
tag.commit()
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported tag", 400
|
||||
|
||||
@@ -57,7 +57,7 @@ class Watch(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single watch, excluding the history list (can be large)
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>
|
||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||
# ?recheck=true
|
||||
@auth.check_token
|
||||
@@ -105,9 +105,6 @@ class Watch(Resource):
|
||||
watch['viewed'] = watch_obj.viewed
|
||||
watch['link'] = watch_obj.link,
|
||||
|
||||
# fetch_backend is a legacy field superseded by browser_profile — omit from API response
|
||||
watch.pop('fetch_backend', None)
|
||||
|
||||
return watch
|
||||
|
||||
@auth.check_token
|
||||
@@ -220,7 +217,7 @@ class WatchHistory(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
# Get a list of available history for a watch by UUID
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>/history
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistory')
|
||||
def get(self, uuid):
|
||||
@@ -341,7 +338,7 @@ class WatchHistoryDiff(Resource):
|
||||
word_diff = True
|
||||
|
||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'false'))
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'true'))
|
||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||
include_added = strtobool(request.args.get('added', 'true'))
|
||||
@@ -352,7 +349,7 @@ class WatchHistoryDiff(Resource):
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=ignore_whitespace,
|
||||
include_equal=not changes_only,
|
||||
include_equal=changes_only,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
@@ -570,4 +567,4 @@ class CreateWatch(Resource):
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
return list, 200
|
||||
return list, 200
|
||||
@@ -3,18 +3,29 @@ from flask import request, abort
|
||||
from loguru import logger
|
||||
|
||||
@functools.cache
|
||||
def build_merged_spec_dict():
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
import os
|
||||
import yaml # Lazy import - only loaded when API validation is actually used
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Each processor can provide an api.yaml file alongside its __init__.py that defines
|
||||
additional schemas (e.g., processor_config_restock_diff). These are merged into
|
||||
WatchBase.properties so the spec accurately reflects what the API accepts.
|
||||
|
||||
Plugin processors (via pluggy) are also supported - they just need an api.yaml
|
||||
next to their processor module.
|
||||
|
||||
Returns the merged dict (cached - do not mutate the returned value).
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the YAML dict directly (not the OpenAPI object).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
@@ -24,59 +35,7 @@ def build_merged_spec_dict():
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
|
||||
try:
|
||||
from changedetectionio.processors import find_processors, get_parent_module
|
||||
for module, proc_name in find_processors():
|
||||
parent = get_parent_module(module)
|
||||
if not parent or not hasattr(parent, '__file__'):
|
||||
continue
|
||||
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
|
||||
if not os.path.exists(api_yaml_path):
|
||||
continue
|
||||
with open(api_yaml_path, 'r', encoding='utf-8') as f:
|
||||
proc_spec = yaml.safe_load(f)
|
||||
# Merge schemas
|
||||
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
|
||||
spec_dict['components']['schemas'].update(proc_schemas)
|
||||
# Inject processor_config_{name} into WatchBase if the schema is defined
|
||||
schema_key = f'processor_config_{proc_name}'
|
||||
if schema_key in proc_schemas:
|
||||
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
|
||||
'$ref': f'#/components/schemas/{schema_key}'
|
||||
}
|
||||
# Append x-code-samples from processor paths into existing path operations
|
||||
for path, path_item in proc_spec.get('paths', {}).items():
|
||||
if path not in spec_dict.get('paths', {}):
|
||||
continue
|
||||
for method, operation in path_item.items():
|
||||
if method not in spec_dict['paths'][path]:
|
||||
continue
|
||||
if 'x-code-samples' in operation:
|
||||
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
|
||||
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to merge processor API specs: {e}")
|
||||
|
||||
return spec_dict
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
return OpenAPI.from_dict(build_merged_spec_dict())
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the merged YAML dict (not the OpenAPI object).
|
||||
"""
|
||||
return build_merged_spec_dict()
|
||||
return yaml.safe_load(f)
|
||||
|
||||
@functools.cache
|
||||
def _resolve_schema_properties(schema_name):
|
||||
@@ -191,6 +150,5 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
from .Spec import Spec
|
||||
from .Notifications import Notifications
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from loguru import logger
|
||||
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
|
||||
|
||||
|
||||
def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
def create_backup(datastore_path, watches: dict):
|
||||
logger.debug("Creating backup...")
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
@@ -40,14 +40,10 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||
for uuid, tag in (tags or {}).items():
|
||||
for f in Path(tag.data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
@@ -92,28 +88,24 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
from .restore import construct_restore_blueprint
|
||||
|
||||
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
|
||||
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
|
||||
backup_threads = []
|
||||
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
|
||||
daemon=True,
|
||||
name="BackupCreator"
|
||||
)
|
||||
@@ -121,7 +113,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
backup_threads.append(zip_thread)
|
||||
flash(gettext("Backup building in background, check back in a few minutes."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
def find_backups():
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
@@ -141,43 +133,40 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return backup_info
|
||||
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
|
||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
if not backups:
|
||||
abort(404)
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||
abort(404)
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
@backups_blueprint.route("/", methods=['GET'])
|
||||
@backups_blueprint.route("/create", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def create():
|
||||
@backups_blueprint.route("", methods=['GET'])
|
||||
def index():
|
||||
backups = find_backups()
|
||||
output = render_template("backup_create.html",
|
||||
output = render_template("overview.html",
|
||||
available_backups=backups,
|
||||
backup_running=any(thread.is_alive() for thread in backup_threads)
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
def remove_backups():
|
||||
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
@@ -187,6 +176,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
flash(gettext("Backups were deleted."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
return backups_blueprint
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import threading
|
||||
import zipfile
|
||||
|
||||
from flask import Blueprint, render_template, flash, url_for, redirect, request
|
||||
from flask_babel import gettext, lazy_gettext as _l
|
||||
from wtforms import Form, BooleanField, SubmitField
|
||||
from flask_wtf.file import FileField, FileAllowed
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
|
||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||
_UUID_RE = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class RestoreForm(Form):
|
||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||
FileAllowed(['zip'], _l('Must be a .zip backup file!'))
|
||||
])
|
||||
include_groups = BooleanField(_l('Include groups'), default=True)
|
||||
include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
|
||||
include_watches = BooleanField(_l('Include watches'), default=True)
|
||||
include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
|
||||
submit = SubmitField(_l('Restore backup'))
|
||||
|
||||
|
||||
def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
|
||||
"""
|
||||
Extract and import watches and groups from a backup zip stream.
|
||||
|
||||
Mirrors the store's _load_watches / _load_tags loading pattern:
|
||||
- UUID dirs with tag.json → Tag.model + tag_obj.commit()
|
||||
- UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
|
||||
|
||||
Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
|
||||
Raises zipfile.BadZipFile if the stream is not a valid zip.
|
||||
"""
|
||||
from changedetectionio.model import Tag
|
||||
|
||||
restored_groups = 0
|
||||
skipped_groups = 0
|
||||
restored_watches = 0
|
||||
skipped_watches = 0
|
||||
|
||||
current_tags = datastore.data['settings']['application'].get('tags', {})
|
||||
current_watches = datastore.data['watching']
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||
raise ValueError(
|
||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||
)
|
||||
resolved_dest = os.path.realpath(tmpdir)
|
||||
for member in zf.infolist():
|
||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||
zf.extract(member, tmpdir)
|
||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||
|
||||
for entry in os.scandir(tmpdir):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
|
||||
uuid = entry.name
|
||||
if not _UUID_RE.match(uuid):
|
||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||
continue
|
||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||
|
||||
# --- Tags (groups) ---
|
||||
if include_groups and os.path.exists(tag_json_path):
|
||||
if uuid in current_tags and not include_groups_replace:
|
||||
logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
|
||||
skipped_groups += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(tag_json_path, 'r', encoding='utf-8') as f:
|
||||
tag_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
title = tag_data.get('title', uuid)
|
||||
logger.debug(f"Restore: importing group '{title}' ({uuid})")
|
||||
|
||||
# Mirror _load_tags: set uuid and force processor
|
||||
tag_data['uuid'] = uuid
|
||||
tag_data['processor'] = 'restock_diff'
|
||||
|
||||
# Copy the UUID directory so data_dir exists for commit()
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
tag_obj = Tag.model(
|
||||
datastore_path=datastore.datastore_path,
|
||||
__datastore=datastore.data,
|
||||
default=tag_data
|
||||
)
|
||||
current_tags[uuid] = tag_obj
|
||||
tag_obj.commit()
|
||||
restored_groups += 1
|
||||
logger.success(f"Restore: group '{title}' ({uuid}) restored")
|
||||
|
||||
# --- Watches ---
|
||||
elif include_watches and os.path.exists(watch_json_path):
|
||||
if uuid in current_watches and not include_watches_replace:
|
||||
logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
|
||||
skipped_watches += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(watch_json_path, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
url = watch_data.get('url', uuid)
|
||||
logger.debug(f"Restore: importing watch '{url}' ({uuid})")
|
||||
|
||||
# Copy UUID directory first so data_dir and history files exist
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
# Mirror _load_watches / rehydrate_entity
|
||||
watch_data['uuid'] = uuid
|
||||
watch_obj = datastore.rehydrate_entity(uuid, watch_data)
|
||||
current_watches[uuid] = watch_obj
|
||||
watch_obj.commit()
|
||||
restored_watches += 1
|
||||
logger.success(f"Restore: watch '{url}' ({uuid}) restored")
|
||||
|
||||
logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
|
||||
f"watches {restored_watches} restored / {skipped_watches} skipped")
|
||||
|
||||
# Persist changedetection.json (includes the updated tags dict)
|
||||
logger.debug("Restore: committing datastore settings")
|
||||
datastore.commit()
|
||||
|
||||
return {
|
||||
'restored_groups': restored_groups,
|
||||
'skipped_groups': skipped_groups,
|
||||
'restored_watches': restored_watches,
|
||||
'skipped_watches': skipped_watches,
|
||||
}
|
||||
|
||||
|
||||
|
||||
def construct_restore_blueprint(datastore):
|
||||
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
|
||||
restore_threads = []
|
||||
|
||||
@restore_blueprint.route("/restore", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def restore():
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
form=form,
|
||||
restore_running=any(t.is_alive() for t in restore_threads),
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def backups_restore_start():
|
||||
if any(t.is_alive() for t in restore_threads):
|
||||
flash(gettext("A restore is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
zip_file = request.files.get('zip_file')
|
||||
if not zip_file or not zip_file.filename:
|
||||
flash(gettext("No file uploaded"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
if not zip_file.filename.lower().endswith('.zip'):
|
||||
flash(gettext("File must be a .zip backup file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Reject oversized uploads before reading the stream into memory.
|
||||
content_length = request.content_length
|
||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return.
|
||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||
try:
|
||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
zip_bytes = io.BytesIO(raw)
|
||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||
pass
|
||||
zip_bytes.seek(0)
|
||||
except zipfile.BadZipFile:
|
||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
include_groups = request.form.get('include_groups') == 'y'
|
||||
include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
|
||||
include_watches = request.form.get('include_watches') == 'y'
|
||||
include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
|
||||
|
||||
restore_thread = threading.Thread(
|
||||
target=import_from_zip,
|
||||
kwargs={
|
||||
'zip_stream': zip_bytes,
|
||||
'datastore': datastore,
|
||||
'include_groups': include_groups,
|
||||
'include_groups_replace': include_groups_replace,
|
||||
'include_watches': include_watches,
|
||||
'include_watches_replace': include_watches_replace,
|
||||
},
|
||||
daemon=True,
|
||||
name="BackupRestore"
|
||||
)
|
||||
restore_thread.start()
|
||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||
restore_threads.append(restore_thread)
|
||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
return restore_blueprint
|
||||
@@ -1,49 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li>
|
||||
<a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary"
|
||||
href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error "
|
||||
href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -1,61 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if restore_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A restore is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||
method="POST"
|
||||
enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.zip_file) }}
|
||||
</div>
|
||||
|
||||
<div class="pure-controls">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,36 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h2>{{ _('Backups') }}</h2>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -102,35 +102,6 @@ def run_async_in_browser_loop(coro):
|
||||
else:
|
||||
raise RuntimeError("Browser steps event loop is not available")
|
||||
|
||||
async def _close_session_resources(session_data, label=''):
|
||||
"""Close all browser resources for a session in the correct order.
|
||||
|
||||
browserstepper.cleanup() closes page+context but not the browser itself.
|
||||
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
|
||||
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
|
||||
"""
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
await browserstepper.cleanup()
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up browserstepper{label}: {e}")
|
||||
|
||||
browser = session_data.get('browser')
|
||||
if browser:
|
||||
try:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser{label}: {e}")
|
||||
|
||||
playwright_context = session_data.get('playwright_context')
|
||||
if playwright_context:
|
||||
try:
|
||||
await playwright_context.stop()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping playwright context{label}: {e}")
|
||||
|
||||
|
||||
def cleanup_expired_sessions():
|
||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
@@ -148,10 +119,13 @@ def cleanup_expired_sessions():
|
||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||
session_data = browsersteps_sessions[session_id]
|
||||
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
# Cleanup playwright resources asynchronously
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -178,10 +152,12 @@ def cleanup_session_for_watch(watch_uuid):
|
||||
|
||||
session_data = browsersteps_sessions.get(session_id)
|
||||
if session_data:
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -202,69 +178,64 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
import time
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
# We keep the playwright session open for many minutes
|
||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
|
||||
browsersteps_start_session = {'start_time': time.time()}
|
||||
|
||||
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
|
||||
proxy_url = datastore.get_proxy_url_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_url:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
# Create a new async playwright instance for browser steps
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
|
||||
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
|
||||
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
|
||||
watch = datastore.data['watching'][watch_uuid]
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_class = content_fetchers.get_fetcher(watch.effective_browser_profile.fetch_backend)
|
||||
|
||||
browser = None
|
||||
playwright_context = None
|
||||
|
||||
# If the fetcher has its own browser launch for the live steps UI, use it.
|
||||
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
|
||||
# or None to fall back to the default CDP path.
|
||||
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
|
||||
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
|
||||
if result is not None:
|
||||
browser, playwright_context = result
|
||||
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_class.__name__}'")
|
||||
|
||||
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
|
||||
if browser is None:
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if '?' not in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if not '?' in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
browsersteps_start_session['browser'] = browser
|
||||
browsersteps_start_session['playwright_context'] = playwright_context
|
||||
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
if proxy_url:
|
||||
|
||||
# Playwright needs separate username and password values
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
|
||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||
browserstepper = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browser,
|
||||
proxy=proxy,
|
||||
start_url=watch.link,
|
||||
headers=watch.get('headers')
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
# Initialize the async connection
|
||||
await browserstepper.connect(proxy=proxy)
|
||||
|
||||
browsersteps_start_session['browserstepper'] = browserstepper
|
||||
|
||||
# For test
|
||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
import uuid
|
||||
@@ -299,8 +270,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug("Starting connection with playwright - done")
|
||||
return {'browsersteps_session_id': browsersteps_session_id}
|
||||
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
def browser_steps_fetch_screenshot_image():
|
||||
from flask import (
|
||||
make_response,
|
||||
@@ -325,8 +296,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
|
||||
|
||||
# A request for an action was received
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
def browsersteps_ui_update():
|
||||
import base64
|
||||
|
||||
|
||||
@@ -40,14 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
contents = ''
|
||||
now = time.time()
|
||||
try:
|
||||
import asyncio
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
update_handler.preferred_proxy_override = preferred_proxy
|
||||
asyncio.run(update_handler.call_browser())
|
||||
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
@@ -96,13 +94,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/status", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/status", methods=['GET'])
|
||||
def get_recheck_status(uuid):
|
||||
results = _recalc_check_status(uuid=uuid)
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/start", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/start", methods=['GET'])
|
||||
def start_check(uuid):
|
||||
|
||||
if not datastore.proxy_list:
|
||||
|
||||
@@ -160,7 +160,8 @@ class import_xlsx_wachete(Importer):
|
||||
flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error')
|
||||
return
|
||||
|
||||
for row_id, row in enumerate(wb.active.iter_rows(min_row=2), start=2):
|
||||
row_id = 2
|
||||
for row in wb.active.iter_rows(min_row=row_id):
|
||||
try:
|
||||
extras = {}
|
||||
data = {}
|
||||
@@ -174,9 +175,9 @@ class import_xlsx_wachete(Importer):
|
||||
dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower() # Convert bool to str to cover all cases
|
||||
# libreoffice and others can have it as =FALSE() =TRUE(), or bool(true)
|
||||
if 'true' in dynamic_wachet or dynamic_wachet == '1':
|
||||
extras['browser_profile'] = 'browser_chromeplaywright'
|
||||
extras['fetch_backend'] = 'html_webdriver'
|
||||
elif 'false' in dynamic_wachet or dynamic_wachet == '0':
|
||||
extras['browser_profile'] = 'direct_http_requests'
|
||||
extras['fetch_backend'] = 'html_requests'
|
||||
|
||||
if data.get('xpath'):
|
||||
# @todo split by || ?
|
||||
@@ -211,6 +212,8 @@ class import_xlsx_wachete(Importer):
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error')
|
||||
else:
|
||||
row_id += 1
|
||||
|
||||
flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
|
||||
|
||||
@@ -238,10 +241,10 @@ class import_xlsx_custom(Importer):
|
||||
|
||||
# @todo cehck atleast 2 rows, same in other method
|
||||
from changedetectionio.forms import validate_url
|
||||
row_i = 0
|
||||
row_i = 1
|
||||
|
||||
try:
|
||||
for row_i, row in enumerate(wb.active.iter_rows(), start=1):
|
||||
for row in wb.active.iter_rows():
|
||||
url = None
|
||||
tags = None
|
||||
extras = {}
|
||||
@@ -292,5 +295,7 @@ class import_xlsx_custom(Importer):
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error')
|
||||
else:
|
||||
row_i += 1
|
||||
|
||||
flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
|
||||
@@ -9,7 +9,6 @@
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -17,11 +16,6 @@
|
||||
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
|
||||
<p>
|
||||
{{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
|
||||
<br>
|
||||
</p>
|
||||
<div class="pure-control-group">
|
||||
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
||||
<br>
|
||||
@@ -43,6 +37,9 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="distill-io">
|
||||
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
||||
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
||||
@@ -52,6 +49,8 @@
|
||||
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
|
||||
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
||||
font-family:monospace;
|
||||
white-space: pre;
|
||||
@@ -115,7 +114,6 @@
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -15,7 +15,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/accept", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
|
||||
def accept(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
@@ -25,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/reject", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||
def reject(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||
datastore.data['watching'][uuid].commit()
|
||||
|
||||
@@ -9,12 +9,11 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/watch/<uuid_str:uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
|
||||
def rss_single_watch(uuid):
|
||||
import time
|
||||
|
||||
from flask import make_response, request, Response
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from flask import make_response, request
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
|
||||
@@ -43,12 +42,12 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return Response(_l("Watch with UUID %(uuid)s not found", uuid=uuid), status=404, mimetype='text/plain')
|
||||
return f"Watch with UUID {uuid} not found", 404
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return Response(_l("Watch %(uuid)s does not have enough history snapshots to show changes (need at least 2)", uuid=uuid), status=400, mimetype='text/plain')
|
||||
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
|
||||
|
||||
# Get the number of diffs to include (default: 5)
|
||||
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
||||
|
||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
|
||||
@@ -15,9 +15,6 @@ from changedetectionio.auth_decorator import login_optionally_required
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
settings_blueprint = Blueprint('settings', __name__, template_folder="templates")
|
||||
|
||||
from changedetectionio.blueprint.settings.browser_profile import construct_blueprint as construct_browser_profile_blueprint
|
||||
settings_blueprint.register_blueprint(construct_browser_profile_blueprint(datastore), url_prefix='/browsers')
|
||||
|
||||
@settings_blueprint.route("", methods=['GET', "POST"])
|
||||
@login_optionally_required
|
||||
def settings_page():
|
||||
|
||||
@@ -1,200 +0,0 @@
|
||||
import flask_login
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
settings_browser_profile_blueprint = Blueprint(
|
||||
'settings_browsers',
|
||||
__name__,
|
||||
template_folder="templates"
|
||||
)
|
||||
|
||||
def _render_index(browser_profile_form=None, editing_machine_name=None):
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import content_fetchers as cf
|
||||
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
|
||||
|
||||
# Only browser-capable fetchers are valid profile types
|
||||
fetcher_choices = cf.available_browser_fetchers()
|
||||
if browser_profile_form is None:
|
||||
browser_profile_form = forms.BrowserProfileForm()
|
||||
browser_profile_form.fetch_backend.choices = fetcher_choices
|
||||
|
||||
fetcher_supports_screenshots = {name: True for name, _ in fetcher_choices}
|
||||
fetcher_requires_connection_url = {name: True for name, cls in cf.FETCHERS.items()
|
||||
if getattr(cls, 'requires_connection_url', False)}
|
||||
|
||||
# Table shows default built-in profiles first, then user-created profiles
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
user_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
|
||||
for machine_name, raw in store_profiles.items():
|
||||
try:
|
||||
user_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
current_default = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
|
||||
|
||||
return render_template(
|
||||
"browser_profiles.html",
|
||||
browser_profiles=user_profiles,
|
||||
browser_profile_form=browser_profile_form,
|
||||
reserved_browser_profile_names=RESERVED_MACHINE_NAMES,
|
||||
fetcher_choices=fetcher_choices,
|
||||
fetcher_supports_screenshots=fetcher_supports_screenshots,
|
||||
fetcher_requires_connection_url=fetcher_requires_connection_url,
|
||||
current_default_profile=current_default,
|
||||
editing_machine_name=editing_machine_name,
|
||||
)
|
||||
|
||||
@settings_browser_profile_blueprint.route("", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def index():
|
||||
return _render_index()
|
||||
|
||||
@settings_browser_profile_blueprint.route("/<string:machine_name>/edit", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def edit(machine_name):
|
||||
from changedetectionio import forms
|
||||
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
|
||||
|
||||
if machine_name in RESERVED_MACHINE_NAMES:
|
||||
flash(gettext("Built-in browser profiles cannot be edited."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
raw = store_profiles.get(machine_name)
|
||||
if raw is None:
|
||||
flash(gettext("Browser profile not found."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
profile = BrowserProfile(**raw) if isinstance(raw, dict) else raw
|
||||
form = forms.BrowserProfileForm(data=profile.model_dump())
|
||||
return _render_index(browser_profile_form=form, editing_machine_name=machine_name)
|
||||
|
||||
@settings_browser_profile_blueprint.route("/save", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def save():
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import content_fetchers as cf
|
||||
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
|
||||
|
||||
fetcher_choices = [(name, desc) for name, desc in cf.available_fetchers()]
|
||||
browser_profile_form = forms.BrowserProfileForm(formdata=request.form)
|
||||
browser_profile_form.fetch_backend.choices = fetcher_choices
|
||||
|
||||
if not browser_profile_form.validate():
|
||||
flash(gettext("Browser profile error: {}").format(
|
||||
'; '.join(str(e) for errs in browser_profile_form.errors.values() for e in errs)
|
||||
), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
name = browser_profile_form.name.data.strip()
|
||||
machine_name = BrowserProfile.machine_name_from_str(name)
|
||||
|
||||
if machine_name in RESERVED_MACHINE_NAMES:
|
||||
flash(gettext("Cannot use reserved profile name '{}'. Please choose a different name.").format(name), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
original_machine_name = request.form.get('original_machine_name', '').strip()
|
||||
store_profiles = datastore.data['settings']['application'].setdefault('browser_profiles', {})
|
||||
|
||||
if machine_name != original_machine_name and machine_name in store_profiles:
|
||||
flash(gettext("A browser profile named '{}' already exists.").format(name), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
profile_data = {
|
||||
'name': name,
|
||||
'fetch_backend': browser_profile_form.fetch_backend.data,
|
||||
'browser_connection_url': browser_profile_form.browser_connection_url.data or None,
|
||||
'viewport_width': browser_profile_form.viewport_width.data or 1280,
|
||||
'viewport_height': browser_profile_form.viewport_height.data or 1000,
|
||||
'block_images': bool(browser_profile_form.block_images.data),
|
||||
'block_fonts': bool(browser_profile_form.block_fonts.data),
|
||||
'ignore_https_errors': bool(browser_profile_form.ignore_https_errors.data),
|
||||
'user_agent': browser_profile_form.user_agent.data or None,
|
||||
'locale': browser_profile_form.locale.data or None,
|
||||
'custom_headers': browser_profile_form.custom_headers.data or '',
|
||||
'is_builtin': False,
|
||||
}
|
||||
|
||||
try:
|
||||
BrowserProfile(**profile_data)
|
||||
except Exception as e:
|
||||
flash(gettext("Browser profile validation error: {}").format(str(e)), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
# Handle rename: remove old key, cascade-update watches and tags
|
||||
if original_machine_name and original_machine_name != machine_name and original_machine_name in store_profiles:
|
||||
del store_profiles[original_machine_name]
|
||||
for watch in datastore.data['watching'].values():
|
||||
if watch.get('browser_profile') == original_machine_name:
|
||||
watch['browser_profile'] = machine_name
|
||||
for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
|
||||
if tag.get('browser_profile') == original_machine_name:
|
||||
tag['browser_profile'] = machine_name
|
||||
|
||||
store_profiles[machine_name] = profile_data
|
||||
datastore.commit()
|
||||
flash(gettext("Browser profile '{}' saved.").format(name), 'notice')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
@settings_browser_profile_blueprint.route("/<string:machine_name>/delete", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(machine_name):
|
||||
from changedetectionio.model.browser_profile import RESERVED_MACHINE_NAMES
|
||||
|
||||
if machine_name in RESERVED_MACHINE_NAMES:
|
||||
flash(gettext("Built-in browser profiles cannot be deleted."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
if machine_name not in store_profiles:
|
||||
flash(gettext("Browser profile not found."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
raw = store_profiles[machine_name]
|
||||
profile_name = raw.get('name', machine_name) if isinstance(raw, dict) else machine_name
|
||||
|
||||
for watch in datastore.data['watching'].values():
|
||||
if watch.get('browser_profile') == machine_name:
|
||||
watch['browser_profile'] = None
|
||||
|
||||
for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
|
||||
if tag.get('browser_profile') == machine_name:
|
||||
tag['browser_profile'] = None
|
||||
|
||||
if datastore.data['settings']['application'].get('browser_profile') == machine_name:
|
||||
datastore.data['settings']['application']['browser_profile'] = None
|
||||
|
||||
del store_profiles[machine_name]
|
||||
datastore.commit()
|
||||
flash(gettext("Browser profile '{}' deleted.").format(profile_name), 'notice')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
@settings_browser_profile_blueprint.route("/set-default", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def set_default():
|
||||
from changedetectionio import content_fetchers as cf
|
||||
|
||||
machine_name = request.form.get('machine_name', '').strip()
|
||||
if not machine_name:
|
||||
flash(gettext("No profile specified."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
from changedetectionio.model.browser_profile import get_profile
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
if get_profile(machine_name, store_profiles) is None:
|
||||
flash(gettext("Unknown browser profile '{}'.").format(machine_name), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
datastore.data['settings']['application']['browser_profile'] = machine_name
|
||||
datastore.commit()
|
||||
flash(gettext("Default browser profile set to '{}'.").format(machine_name), 'notice')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
return settings_browser_profile_blueprint
|
||||
@@ -1,163 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h2>{{ _('Browser Profiles') }}</h2>
|
||||
<p>{{ _('Create named profiles to configure browser settings — viewport size, connection URL, image/font blocking, and more. Each profile is based on an available browser type.') }}</p>
|
||||
|
||||
<form id="set-default-form" action="{{ url_for('settings.settings_browsers.set_default') }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<input type="hidden" name="machine_name" id="default-machine-name" value="">
|
||||
</form>
|
||||
{% if browser_profiles %}
|
||||
<table class="pure-table pure-table-striped" style="width:100%; margin-bottom:1.5em;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="width:2.5em; text-align:center;" title="{{ _('System default') }}">{{ _('Default') }}</th>
|
||||
<th>{{ _('Name') }}</th>
|
||||
<th>{{ _('Type') }}</th>
|
||||
<th style="width:3em; text-align:center;"></th>
|
||||
<th>{{ _('Viewport') }}</th>
|
||||
<th>{{ _('Options') }}</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for machine_name, profile in browser_profiles.items() %}
|
||||
<tr>
|
||||
<td style="text-align:center;">
|
||||
<input type="radio"
|
||||
name="default_profile"
|
||||
value="{{ machine_name }}"
|
||||
title="{{ _('Set as system default') }}"
|
||||
{% if machine_name == current_default_profile %}checked{% endif %}
|
||||
onchange="setDefaultProfile('{{ machine_name }}')">
|
||||
</td>
|
||||
<td>{{ profile.name }}</td>
|
||||
<td><code>{{ profile.fetch_backend }}</code></td>
|
||||
<td style="text-align:center;">{{ profile.get_fetcher_class_name()|fetcher_status_icons }}</td>
|
||||
<td>{{ profile.viewport_width }}×{{ profile.viewport_height }}</td>
|
||||
<td style="font-size:0.8em; line-height:1.6;">
|
||||
{% if profile.block_images %}{{ _('No images') }}<br>{% endif %}
|
||||
{% if profile.block_fonts %}{{ _('No fonts') }}<br>{% endif %}
|
||||
{% if profile.ignore_https_errors %}{{ _('Ignore TLS') }}<br>{% endif %}
|
||||
{% if profile.browser_connection_url %}<span title="{{ profile.browser_connection_url }}">{{ _('Custom URL') }}</span>{% endif %}
|
||||
</td>
|
||||
<td style="white-space:nowrap;">
|
||||
{% if not profile.is_builtin %}
|
||||
<a href="{{ url_for('settings.settings_browsers.edit', machine_name=machine_name) }}"
|
||||
class="pure-button button-small">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('settings.settings_browsers.delete', machine_name=machine_name) }}"
|
||||
class="pure-button button-small button-error"
|
||||
onclick="return confirm('{{ _('Delete this browser profile?') }}')">{{ _('Delete') }}</a>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% else %}
|
||||
<p style="color:#888; font-style:italic;">{{ _('No browser profiles configured yet. Add one below.') }}</p>
|
||||
{% endif %}
|
||||
|
||||
<div class="border-fieldset">
|
||||
<h3 id="profile-form-heading">{{ _('Edit browser profile') if editing_machine_name else _('Add new browser profile') }}</h3>
|
||||
{% if not editing_machine_name %}
|
||||
<p style="font-size:0.9em; color:#666;">{{ _('Choose a browser type, give it a name, and configure its settings. You can create multiple profiles from the same type with different connection URLs or options.') }}</p>
|
||||
{% endif %}
|
||||
<form class="pure-form pure-form-stacked"
|
||||
id="browser-profile-form"
|
||||
action="{{ url_for('settings.settings_browsers.save') }}"
|
||||
method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<input type="hidden" name="original_machine_name" id="original_machine_name" value="{{ editing_machine_name or '' }}">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(browser_profile_form.name) }}
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(browser_profile_form.fetch_backend, id="profile-fetch-backend") }}
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field cdp-only-field">
|
||||
{{ render_field(browser_profile_form.browser_connection_url) }}
|
||||
<span class="pure-form-message-inline">{{ _('Optional — override the system CDP/WebSocket URL for this profile only (e.g.') }} <code>ws://my-chrome:3000</code>).</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field" style="display:flex; gap:1em; flex-wrap:wrap;">
|
||||
<div>{{ render_field(browser_profile_form.viewport_width) }}</div>
|
||||
<div>{{ render_field(browser_profile_form.viewport_height) }}</div>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_checkbox_field(browser_profile_form.block_images) }}
|
||||
<span class="pure-form-message-inline">{{ _('Block image downloads — speeds up loads on image-heavy pages.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_checkbox_field(browser_profile_form.block_fonts) }}
|
||||
<span class="pure-form-message-inline">{{ _('Block web font downloads.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_checkbox_field(browser_profile_form.ignore_https_errors) }}
|
||||
<span class="pure-form-message-inline">{{ _('Ignore TLS/HTTPS certificate errors (useful for self-signed certs on staging sites).') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_field(browser_profile_form.user_agent) }}
|
||||
<span class="pure-form-message-inline">{{ _("Leave blank to use the fetcher's default User-Agent.") }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_field(browser_profile_form.locale) }}
|
||||
<span class="pure-form-message-inline">{{ _('Sets Accept-Language and navigator.language (e.g. en-US, de-DE).') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(browser_profile_form.custom_headers) }}
|
||||
<span class="pure-form-message-inline">{{ _('Extra HTTP headers for all requests using this profile (one per line, Key: Value). Applied before per-watch headers.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary" id="profile-submit-btn">{{ _('Save profile') }}</button>
|
||||
{% if editing_machine_name %}
|
||||
<a href="{{ url_for('settings.settings_browsers.index') }}" class="pure-button button-cancel">{{ _('Cancel') }}</a>
|
||||
{% endif %}
|
||||
<a href="{{ url_for('settings.settings_page') }}" class="pure-button button-cancel">{{ _('Back to Settings') }}</a>
|
||||
</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function setDefaultProfile(machineName) {
|
||||
document.getElementById('default-machine-name').value = machineName;
|
||||
document.getElementById('set-default-form').submit();
|
||||
}
|
||||
|
||||
const fetcherSupportsBrowser = {{ fetcher_supports_screenshots | tojson }};
|
||||
const fetcherRequiresConnectionUrl = {{ fetcher_requires_connection_url | tojson }};
|
||||
|
||||
function updateBrowserFieldVisibility() {
|
||||
const fetchBackend = document.getElementById('profile-fetch-backend').value;
|
||||
const isBrowser = !!fetcherSupportsBrowser[fetchBackend];
|
||||
const isCdp = !!fetcherRequiresConnectionUrl[fetchBackend];
|
||||
document.querySelectorAll('.browser-only-field').forEach(function(el) {
|
||||
el.style.display = isBrowser ? '' : 'none';
|
||||
});
|
||||
document.querySelectorAll('.cdp-only-field').forEach(function(el) {
|
||||
el.style.display = isCdp ? '' : 'none';
|
||||
});
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const sel = document.getElementById('profile-fetch-backend');
|
||||
if (sel) {
|
||||
sel.addEventListener('change', updateBrowserFieldVisibility);
|
||||
updateBrowserFieldVisibility();
|
||||
}
|
||||
});
|
||||
|
||||
{% if editing_machine_name %}
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
document.getElementById('browser-profile-form').scrollIntoView({behavior: 'smooth'});
|
||||
});
|
||||
{% endif %}
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -25,10 +25,9 @@
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Browsers') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
{% for tab in plugin_tabs %}
|
||||
<li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
|
||||
@@ -116,7 +115,14 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="fetching">
|
||||
<fieldset class="pure-group" id="webdriver-override-options">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched sites don\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var') }} 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<br>
|
||||
@@ -140,9 +146,17 @@
|
||||
{{ render_field(form.requests.form.timeout) }}
|
||||
<span class="pure-form-message-inline">{{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}</span><br>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.default_ua) }}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Applied to all requests.') }}<br><br>
|
||||
{{ _('Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it\'s important to consider') }} <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">{{ _('all of the ways that the browser is detected') }}</a>.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -338,7 +352,7 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
|
||||
@@ -22,14 +22,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
|
||||
|
||||
from changedetectionio import processors
|
||||
output = render_template("groups-overview.html",
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
available_tags=sorted_tags,
|
||||
form=add_form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
tag_count=tag_count,
|
||||
wcag_text_color=processors.wcag_text_color,
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -57,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/mute/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def mute(uuid):
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
@@ -66,13 +63,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
tag.commit()
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(uuid):
|
||||
# Delete the tag from settings immediately
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
del datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches in background thread to avoid blocking
|
||||
def remove_tag_background(tag_uuid):
|
||||
"""Background thread to remove tag from watches - discarded after completion."""
|
||||
@@ -93,7 +101,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Tag deleted, removing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/unlink/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def unlink(uuid):
|
||||
# Unlink tag from all watches in background thread to avoid blocking
|
||||
@@ -119,11 +127,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
|
||||
# Delete all tag.json files
|
||||
import os
|
||||
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
||||
# TagsDict 'del' handler will remove the dir
|
||||
del datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
|
||||
|
||||
# Clear all tags from settings immediately
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
|
||||
# Clear tags from all watches in background thread to avoid blocking
|
||||
def clear_all_tags_background():
|
||||
@@ -144,7 +160,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("All tags deleted, clearing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
@@ -163,21 +179,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
default_system_settings = datastore.data['settings'],
|
||||
)
|
||||
|
||||
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
|
||||
# The API stores processor_config_restock_diff in the tag dict; find the matching
|
||||
# FormField by checking which one's sub-fields cover the config keys.
|
||||
from wtforms.fields.form import FormField as WTFormField
|
||||
for key, value in default.items():
|
||||
if not key.startswith('processor_config_') or not isinstance(value, dict):
|
||||
continue
|
||||
for form_field in form:
|
||||
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
|
||||
for sub_key, sub_value in value.items():
|
||||
sub_field = form_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
break
|
||||
|
||||
template_args = {
|
||||
'data': default,
|
||||
'form': form,
|
||||
@@ -211,17 +212,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
template = env.from_string(template_str)
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
# Watches whose URL currently matches this tag's pattern
|
||||
matching_watches = {
|
||||
w_uuid: watch
|
||||
for w_uuid, watch in datastore.data['watching'].items()
|
||||
if default.matches_url(watch.get('url', ''))
|
||||
}
|
||||
|
||||
output = render_template("edit-tag.html",
|
||||
extra_form_content=included_content,
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
matching_watches=matching_watches,
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
**template_args
|
||||
)
|
||||
@@ -229,7 +222,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return output
|
||||
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['POST'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit_submit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
@@ -262,4 +255,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
def form_tag_delete(uuid):
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
return tags_blueprint
|
||||
|
||||
@@ -10,11 +10,12 @@ from changedetectionio.processors.restock_diff.forms import processor_settings_f
|
||||
|
||||
class group_restock_settings_form(restock_settings_form):
|
||||
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
|
||||
url_match_pattern = StringField('Auto-apply to watches with URLs matching',
|
||||
render_kw={"placeholder": "e.g. *://example.com/* or github.com/myorg"})
|
||||
tag_colour = StringField('Tag colour', default='')
|
||||
|
||||
class SingleTag(Form):
|
||||
|
||||
name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -43,46 +43,6 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url_match_pattern, class="m-d") }}
|
||||
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
|
||||
</div>
|
||||
{% if matching_watches %}
|
||||
<div class="pure-control-group">
|
||||
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
|
||||
<ul class="tag-url-match-list">
|
||||
{% for w_uuid, w in matching_watches.items() %}
|
||||
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="pure-control-group">
|
||||
<label>{{ _('Tag colour') }}</label>
|
||||
<div style="display:flex; align-items:center; gap:0.75em;">
|
||||
<input type="checkbox" id="use_custom_colour"
|
||||
{% if data.get('tag_colour') %}checked{% endif %}>
|
||||
<label for="use_custom_colour" style="margin:0">{{ _('Custom colour') }}</label>
|
||||
<input type="color" id="tag_colour_picker"
|
||||
value="{{ data.get('tag_colour') or '#4f8ef7' }}"
|
||||
{% if not data.get('tag_colour') %}disabled{% endif %}>
|
||||
<input type="hidden" name="tag_colour" id="tag_colour_hidden"
|
||||
value="{{ data.get('tag_colour', '') }}">
|
||||
</div>
|
||||
<span class="pure-form-message-inline">{{ _('Leave unchecked to use the auto-generated colour based on the tag name.') }}</span>
|
||||
</div>
|
||||
<script>
|
||||
(function () {
|
||||
var cb = document.getElementById('use_custom_colour');
|
||||
var picker = document.getElementById('tag_colour_picker');
|
||||
var hidden = document.getElementById('tag_colour_hidden');
|
||||
picker.addEventListener('input', function () { hidden.value = this.value; });
|
||||
cb.addEventListener('change', function () {
|
||||
picker.disabled = !this.checked;
|
||||
hidden.value = this.checked ? picker.value : '';
|
||||
});
|
||||
})();
|
||||
</script>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -3,26 +3,6 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<style>
|
||||
{%- for uuid, tag in available_tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- if tag.get('tag_colour') -%}
|
||||
.watch-tag-list.tag-{{ class_name }} { background-color: {{ tag.tag_colour }}; color: {{ wcag_text_color(tag.tag_colour) }}; }
|
||||
{%- else -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
|
||||
@@ -68,7 +48,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
|
||||
|
||||
@@ -141,7 +141,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
# Import the login decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@ui_blueprint.route("/clear_history/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def clear_watch_history(uuid):
|
||||
try:
|
||||
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def clear_all_history():
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext', '')
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||
if confirmtext == 'clear':
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
@@ -366,7 +366,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
|
||||
@ui_blueprint.route("/share-url/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/share-url/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_share_put_watch(uuid):
|
||||
"""Given a watch UUID, upload the info and return a share-link
|
||||
|
||||
@@ -66,7 +66,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return Markup(result)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
"""
|
||||
@@ -128,7 +128,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_GET(uuid):
|
||||
"""
|
||||
@@ -182,7 +182,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['POST'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_POST(uuid):
|
||||
"""
|
||||
@@ -238,7 +238,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
|
||||
@@ -20,7 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
||||
return True
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||
@@ -67,10 +67,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
default['proxy'] = ''
|
||||
# proxy_override set to the json/text list of the items
|
||||
|
||||
# browser_profile: None means "use system default" — map to 'system' so the radio pre-selects correctly
|
||||
if not default.get('browser_profile'):
|
||||
default['browser_profile'] = 'system'
|
||||
|
||||
# Does it use some custom form? does one exist?
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||
@@ -121,59 +117,19 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if processor_config:
|
||||
from wtforms.fields.form import FormField
|
||||
# Populate processor-config-* fields from JSON
|
||||
for config_key, config_value in processor_config.items():
|
||||
if not isinstance(config_value, dict):
|
||||
continue
|
||||
# Try exact API-named field first (e.g., processor_config_restock_diff)
|
||||
target_field = getattr(form, f'processor_config_{config_key}', None)
|
||||
# Fallback: find any FormField sub-form whose fields cover config_value keys
|
||||
if target_field is None:
|
||||
for form_field in form:
|
||||
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
|
||||
target_field = form_field
|
||||
break
|
||||
if target_field is not None:
|
||||
for sub_key, sub_value in config_value.items():
|
||||
sub_field = target_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
|
||||
field_name = f'processor_config_{config_key}'
|
||||
if hasattr(form, field_name):
|
||||
getattr(form, field_name).data = config_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load processor config: {e}")
|
||||
|
||||
from changedetectionio.model.browser_profile import BrowserProfile
|
||||
from changedetectionio import content_fetchers as cf
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
for p in datastore.extra_browsers:
|
||||
form.fetch_backend.choices.append(p)
|
||||
|
||||
# Resolve the name of the system-level default profile for the label
|
||||
from changedetectionio.model.browser_profile import get_profile
|
||||
_system_default_machine_name = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
|
||||
_all_store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
_default_profile = get_profile(_system_default_machine_name, _all_store_profiles)
|
||||
if _default_profile:
|
||||
_system_label = gettext('System settings default') + ' \u2013 ' + _default_profile.name
|
||||
else:
|
||||
_system_label = gettext('System settings default')
|
||||
|
||||
# Choices: system default + always-present defaults (requests) + user-created profiles
|
||||
form.browser_profile.choices = [('system', _system_label)] + [
|
||||
(p.get_machine_name(), p.name)
|
||||
for p in cf.DEFAULT_BROWSER_PROFILES.values()
|
||||
] + [
|
||||
(machine_name, raw.get('name', machine_name) if isinstance(raw, dict) else getattr(raw, 'name', machine_name))
|
||||
for machine_name, raw in store_profiles.items()
|
||||
]
|
||||
|
||||
# Build a map of machine_name → fetcher class name for the JS visibility system
|
||||
all_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
|
||||
for machine_name, raw in store_profiles.items():
|
||||
try:
|
||||
all_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
|
||||
except Exception:
|
||||
pass
|
||||
browser_profile_fetchers = {mn: p.get_fetcher_class_name() for mn, p in all_profiles.items()}
|
||||
form.fetch_backend.choices.append(("system", 'System settings default'))
|
||||
|
||||
# form.browser_steps[0] can be assumed that we 'goto url' first
|
||||
|
||||
@@ -241,7 +197,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore, default=datastore.data['watching'][uuid])
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid])
|
||||
|
||||
# Save the watch immediately
|
||||
datastore.data['watching'][uuid].commit()
|
||||
@@ -327,7 +283,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
template_args = {
|
||||
'available_processors': processors.available_processors(),
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
'browser_profile_fetchers': browser_profile_fetchers,
|
||||
'browser_steps_config': browser_step_ui_config,
|
||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
'extra_classes': ' '.join(c),
|
||||
@@ -352,12 +307,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
'capabilities': capabilities,
|
||||
'auto_applied_tags': {
|
||||
tag_uuid: tag
|
||||
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
|
||||
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
|
||||
},
|
||||
'capabilities': capabilities
|
||||
}
|
||||
|
||||
included_content = None
|
||||
@@ -377,7 +327,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
return output
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-html", methods=['GET'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_latest_html(uuid):
|
||||
from io import BytesIO
|
||||
@@ -404,7 +354,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Return a 500 error
|
||||
abort(500)
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-data-package", methods=['GET'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/get-data-package", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_data_package(uuid):
|
||||
"""Download all data for a single watch as a zip file"""
|
||||
@@ -455,7 +405,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
mimetype='application/zip')
|
||||
|
||||
# Ajax callback
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def watch_get_preview_rendered(uuid):
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
|
||||
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
@@ -60,8 +59,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
fetcher_supports_screenshots = watch.fetcher_supports_screenshots
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
@@ -71,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||
|
||||
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
@@ -112,7 +113,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
highlight_triggered_line_numbers=triggered_line_numbers,
|
||||
highlight_blocked_line_numbers=blocked_line_numbers,
|
||||
history_n=watch.history_n,
|
||||
fetcher_supports_screenshots=fetcher_supports_screenshots,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
@@ -124,7 +125,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return output
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
|
||||
@@ -143,7 +143,7 @@
|
||||
<div class="tip">
|
||||
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
|
||||
</div>
|
||||
{% if fetcher_supports_screenshots %}
|
||||
{% if is_html_webdriver %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}" >
|
||||
|
||||
@@ -27,8 +27,7 @@
|
||||
const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}";
|
||||
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
|
||||
const default_system_fetch_backend = {{ (browser_profile_fetchers.get(settings_application.get('browser_profile') or 'direct_http_requests', 'requests')) | tojson }};
|
||||
const browserProfileFetcherMap = {{ browser_profile_fetchers | tojson }};
|
||||
const default_system_fetch_backend="{{ settings_application['fetch_backend'] }}";
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
||||
@@ -82,14 +81,6 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
|
||||
{% if auto_applied_tags %}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Also automatically applied by URL pattern:') }}
|
||||
{% for tag_uuid, tag in auto_applied_tags.items() %}
|
||||
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
|
||||
{% endfor %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
@@ -140,19 +131,11 @@
|
||||
{% if capabilities.supports_request_type %}
|
||||
<div class="tab-pane-inner" id="request">
|
||||
<div class="pure-control-group inline-radio">
|
||||
<div><label for="browser_profile">{{ form.browser_profile.label.text }}</label></div>
|
||||
<div><ul class="fetch-backend" id="browser_profile">
|
||||
{%- for subfield in form.browser_profile %}
|
||||
<li>
|
||||
{{ subfield() }}
|
||||
{{ browser_profile_fetchers.get(subfield.data, '')|fetcher_status_icons }}
|
||||
<label for="{{ subfield.id }}">{{ subfield.label.text }}</label>
|
||||
</li>
|
||||
{%- endfor %}
|
||||
</ul></div>
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>{{ _('Choose how this watch fetches its target URL. \'System settings default\' inherits the global setting.') }}</p>
|
||||
<p>{{ _('Manage browser profiles in') }} <a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Settings → Browsers') }}</a>.</p>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched site doesn\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.') }} </p>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
</span>
|
||||
</div>
|
||||
{% if form.proxy %}
|
||||
@@ -166,7 +149,7 @@
|
||||
{% endif %}
|
||||
|
||||
<!-- webdriver always -->
|
||||
<fieldset data-visible-for="fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser" style="display: none;">
|
||||
<fieldset data-visible-for="fetch_backend=html_webdriver" style="display: none;">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.webdriver_delay) }}
|
||||
<div class="pure-form-message-inline">
|
||||
@@ -189,8 +172,8 @@
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<!-- requests always -->
|
||||
<fieldset data-visible-for="fetch_backend=requests">
|
||||
<!-- html requests always -->
|
||||
<fieldset data-visible-for="fetch_backend=html_requests">
|
||||
<div class="pure-control-group">
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
|
||||
</div>
|
||||
@@ -227,7 +210,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
({{ _('Not supported by Selenium browser') }})
|
||||
</div>
|
||||
</div>
|
||||
<fieldset data-visible-for="fetch_backend=requests fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser" >
|
||||
<fieldset data-visible-for="fetch_backend=html_requests fetch_backend=html_webdriver" >
|
||||
<div class="pure-control-group inline-radio advanced-options" style="display: none;">
|
||||
{{ render_checkbox_field(form.ignore_status_codes) }}
|
||||
</div>
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
@@ -28,7 +28,6 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
|
||||
@@ -81,7 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
proxy_list = datastore.proxy_list
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
@@ -92,9 +91,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
extra_classes='has-queue' if not update_q.empty() else '',
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
wcag_text_color=processors.wcag_text_color,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=proxy_list,
|
||||
has_proxies=datastore.proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -106,22 +104,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
search_q=request.args.get('q', '').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('browser_profile'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=sorted_tags,
|
||||
unread_changes_count=datastore.unread_changes_count,
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
# Return freed template-building memory to the OS immediately.
|
||||
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.get('share-link'):
|
||||
del (session['share-link'])
|
||||
|
||||
|
||||
@@ -71,13 +71,6 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{%- for uuid, tag in tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- if tag.get('tag_colour') -%}
|
||||
.button-tag.tag-{{ class_name }},
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ tag.tag_colour }};
|
||||
color: {{ wcag_text_color(tag.tag_colour) }};
|
||||
}
|
||||
{%- else -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.button-tag.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
@@ -99,7 +92,6 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
<div class="box" id="form-quick-watch-add">
|
||||
@@ -221,13 +213,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if error_texts|length > 2 else '',
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
@@ -280,7 +271,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
@@ -293,7 +284,10 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{{ watch.effective_browser_profile.get_fetcher_class_name()|fetcher_status_icons }}
|
||||
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
|
||||
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
|
||||
{{ effective_fetcher|fetcher_status_icons }}
|
||||
{%- endif -%}
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
|
||||
@@ -311,20 +305,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||
{%- set restock = watch['restock'] -%}
|
||||
{%- set price = restock.get('price') -%}
|
||||
{%- set cur = restock.get('currency','') -%}
|
||||
|
||||
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||
{{ price }} {{ cur }}<!-- as string -->
|
||||
{%- if watch['restock']['price'] is number -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
|
||||
{%- endif -%}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import sys
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from loguru import logger
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
|
||||
import os
|
||||
@@ -24,71 +25,87 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
|
||||
# Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))
|
||||
|
||||
# available_fetchers() will scan this implementation looking for anything starting with html_
|
||||
# this information is used in the form selections
|
||||
from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
||||
|
||||
|
||||
import importlib.resources
|
||||
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||
FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')
|
||||
|
||||
|
||||
# Registry: clean fetcher name → fetcher class (e.g. 'requests', 'playwright', 'cloakbrowser')
|
||||
FETCHERS: dict = {}
|
||||
|
||||
|
||||
def register_fetcher(name: str, cls) -> None:
|
||||
"""Register a fetcher class under its clean name (no html_ prefix)."""
|
||||
FETCHERS[name] = cls
|
||||
|
||||
|
||||
def get_fetcher(name: str):
|
||||
"""Return the fetcher class for a clean name, or None."""
|
||||
return FETCHERS.get(name)
|
||||
|
||||
|
||||
def available_fetchers():
|
||||
"""Return list of (name, description) for all registered fetchers."""
|
||||
return [(name, cls.fetcher_description) for name, cls in FETCHERS.items()
|
||||
if hasattr(cls, 'fetcher_description')]
|
||||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||||
import inspect
|
||||
p = []
|
||||
|
||||
# Get built-in fetchers (but skip plugin fetchers that were added via setattr)
|
||||
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
|
||||
if inspect.isclass(obj):
|
||||
# @todo html_ is maybe better as fetcher_ or something
|
||||
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
|
||||
if name.startswith('html_'):
|
||||
# Skip plugin fetchers that were already registered
|
||||
if name not in _plugin_fetchers:
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
|
||||
# Get plugin fetchers from cache (already loaded at module init)
|
||||
for name, fetcher_class in _plugin_fetchers.items():
|
||||
if hasattr(fetcher_class, 'fetcher_description'):
|
||||
t = tuple([name, fetcher_class.fetcher_description])
|
||||
p.append(t)
|
||||
else:
|
||||
logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def available_browser_fetchers():
|
||||
"""Return list of (name, description) for fetchers that support screenshots (browser-type fetchers)."""
|
||||
return [(name, cls.fetcher_description) for name, cls in FETCHERS.items()
|
||||
if cls.supports_screenshots]
|
||||
def get_plugin_fetchers():
|
||||
"""Load and return all plugin fetchers from the centralized plugin manager."""
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
|
||||
|
||||
def _load_fetchers():
|
||||
"""Load all fetchers (built-ins + plugins) into the FETCHERS registry."""
|
||||
from changedetectionio.pluggy_interface import plugin_manager, register_builtin_fetchers
|
||||
|
||||
# Built-ins must be registered first
|
||||
register_builtin_fetchers()
|
||||
|
||||
# Then external plugins
|
||||
fetchers = {}
|
||||
try:
|
||||
# Call the register_content_fetcher hook from all registered plugins
|
||||
results = plugin_manager.hook.register_content_fetcher()
|
||||
for result in results:
|
||||
if result:
|
||||
name, fetcher_class = result
|
||||
register_fetcher(name, fetcher_class)
|
||||
logger.info(f"Registered fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', '?')}")
|
||||
fetchers[name] = fetcher_class
|
||||
# Register in current module so hasattr() checks work
|
||||
setattr(sys.modules[__name__], name, fetcher_class)
|
||||
logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading plugin fetchers: {e}")
|
||||
|
||||
return fetchers
|
||||
|
||||
|
||||
# Default browser profiles always shown in the browser profiles table (keyed by machine name)
|
||||
DEFAULT_BROWSER_PROFILES: dict = {}
|
||||
# Initialize plugins at module load time
|
||||
_plugin_fetchers = get_plugin_fetchers()
|
||||
|
||||
|
||||
def _register_default_browser_profiles():
|
||||
"""Register browser profiles that are always present in the profiles table."""
|
||||
from changedetectionio.model.browser_profile import BUILTIN_REQUESTS
|
||||
DEFAULT_BROWSER_PROFILES[BUILTIN_REQUESTS.get_machine_name()] = BUILTIN_REQUESTS
|
||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
|
||||
# rather than site-specific.
|
||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
if use_playwright_as_chrome_fetcher:
|
||||
# @note - For now, browser steps always uses playwright
|
||||
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
|
||||
logger.debug('Using Playwright library as fetcher')
|
||||
from .playwright import fetcher as html_webdriver
|
||||
else:
|
||||
logger.debug('Using direct Python Puppeteer library as fetcher')
|
||||
from .puppeteer import fetcher as html_webdriver
|
||||
|
||||
else:
|
||||
logger.debug("Falling back to selenium as fetcher")
|
||||
from .webdriver_selenium import fetcher as html_webdriver
|
||||
|
||||
|
||||
# Populate the registry at module load time
|
||||
_load_fetchers()
|
||||
|
||||
|
||||
_register_default_browser_profiles()
|
||||
# Register built-in fetchers as plugins after all imports are complete
|
||||
from changedetectionio.pluggy_interface import register_builtin_fetchers
|
||||
register_builtin_fetchers()
|
||||
|
||||
|
||||
@@ -70,41 +70,37 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
# Icon shown in the watch list when this fetcher is the effective fetcher.
|
||||
# Set to a dict with 'filename', 'alt', 'title' keys (image served from static/images/).
|
||||
# None means no icon is shown (e.g. plain HTTP requests fetcher).
|
||||
status_icon = None
|
||||
|
||||
# Screenshot element locking - prevents layout shifts during screenshot capture
|
||||
# Only needed for visual comparison (image_ssim_diff processor)
|
||||
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
|
||||
lock_viewport_elements = False # Default: disabled for performance
|
||||
|
||||
# BrowserProfile-derived settings — applied by browser fetchers, ignored by html_requests
|
||||
viewport_width: int = 1280
|
||||
viewport_height: int = 1000
|
||||
block_images: bool = False
|
||||
block_fonts: bool = False
|
||||
profile_user_agent: str = None # Profile-level UA; lower priority than request_headers User-Agent
|
||||
ignore_https_errors: bool = False
|
||||
locale: str = None
|
||||
service_workers: str = 'allow'
|
||||
extra_delay: int = 0
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
# Allow lock_viewport_elements to be set via kwargs
|
||||
if kwargs and 'lock_viewport_elements' in kwargs:
|
||||
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
|
||||
|
||||
# BrowserProfile fields — store whatever was passed, subclasses use them
|
||||
for field in ('viewport_width', 'viewport_height', 'block_images', 'block_fonts',
|
||||
'profile_user_agent', 'ignore_https_errors', 'locale',
|
||||
'service_workers', 'extra_delay'):
|
||||
if field in kwargs:
|
||||
setattr(self, field, kwargs[field])
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return data for status icon to display in the watch overview.
|
||||
|
||||
This method can be overridden by subclasses to provide custom status icons.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with icon data:
|
||||
{
|
||||
'filename': 'icon-name.svg', # Icon filename
|
||||
'alt': 'Alt text', # Alt attribute
|
||||
'title': 'Tooltip text', # Title attribute
|
||||
'style': 'height: 1em;' # Optional inline CSS
|
||||
}
|
||||
Or None if no icon
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear_content(self):
|
||||
"""
|
||||
@@ -202,16 +198,6 @@ class Fetcher():
|
||||
# Stop processing here
|
||||
raise BrowserStepsStepException(step_n=step_n, original_e=e)
|
||||
|
||||
def disk_cleanup_after_fetch(self):
|
||||
"""Remove any temporary files written to disk during a fetch.
|
||||
|
||||
The default implementation is a no-op. Browser-based fetchers
|
||||
override this to delete browser-step screenshots and any other
|
||||
ephemeral files they create. Called by the processor after
|
||||
``quit()`` regardless of whether the fetch succeeded or failed.
|
||||
"""
|
||||
pass
|
||||
|
||||
# It's always good to reset these
|
||||
def delete_browser_steps_screenshots(self):
|
||||
import glob
|
||||
|
||||
@@ -49,9 +49,6 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
"""
|
||||
Playwright CDP fetcher — connects to a remote browser via Chrome DevTools Protocol.
|
||||
|
||||
browser_connection_url must be supplied via the resolved BrowserProfile
|
||||
(set by preconfigure_browser_profiles_based_on_env at startup or edited in the UI).
|
||||
"""
|
||||
from loguru import logger
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright Chrome (CDP/Remote)"
|
||||
requires_connection_url = True
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(proxy_override=proxy_override, custom_browser_connection_url=custom_browser_connection_url, **kwargs)
|
||||
|
||||
if custom_browser_connection_url:
|
||||
self.browser_connection_is_custom = True
|
||||
self.browser_connection_url = custom_browser_connection_url
|
||||
else:
|
||||
logger.critical("Playwright CDP fetcher has no browser_connection_url — browser profile was not configured. "
|
||||
"Set PLAYWRIGHT_DRIVER_URL or configure a browser profile in Settings.")
|
||||
self.browser_connection_url = None
|
||||
|
||||
# CDP always connects to Chromium
|
||||
self.browser_type = 'chromium'
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
browser_type = getattr(p, self.browser_type)
|
||||
return await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60_000)
|
||||
|
||||
|
||||
class PlaywrightCDPPlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_cdp', fetcher)
|
||||
|
||||
|
||||
cdp_plugin = PlaywrightCDPPlugin()
|
||||
@@ -1,403 +0,0 @@
|
||||
"""
|
||||
Playwright-based content fetchers.
|
||||
|
||||
Submodules:
|
||||
cdp — connect to a remote browser via Chrome DevTools Protocol (CDP/WebSocket)
|
||||
chrome — launch a local Chromium browser
|
||||
firefox — launch a local Firefox browser
|
||||
webkit — launch a local WebKit (Safari-engine) browser
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import (
|
||||
SCREENSHOT_MAX_HEIGHT_DEFAULT,
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT,
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD,
|
||||
FAVICON_FETCHER_JS,
|
||||
INSTOCK_DATA_JS,
|
||||
XPATH_ELEMENT_JS,
|
||||
visualselector_xpath_selectors,
|
||||
)
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import (
|
||||
BrowserStepsStepException,
|
||||
EmptyReply,
|
||||
Non200ErrorCodeReceived,
|
||||
PageUnloadable,
|
||||
ScreenshotUnavailable,
|
||||
)
|
||||
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if lock_viewport_elements and page_height > page.viewport_size['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled (took {time.time() - lock_start:.2f}s)")
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
if y > 0:
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
|
||||
await _safe_request_gc(page)
|
||||
|
||||
screenshot_kwargs = {'type': screenshot_type, 'full_page': False}
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
if elements_locked:
|
||||
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'unlock-elements-sizing.js')
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
stitch_start = time.time()
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px | "
|
||||
f"Setup: {total_time - capture_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
async def _safe_request_gc(page):
|
||||
"""Request browser GC — Chromium-specific, silently ignored on Firefox/WebKit."""
|
||||
try:
|
||||
await page.request_gc()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class PlaywrightBaseFetcher(Fetcher):
|
||||
"""
|
||||
Shared base for all Playwright fetchers.
|
||||
|
||||
Subclasses implement ``_connect_browser(playwright_instance)`` to return a
|
||||
connected-or-launched browser object. Everything else — context creation,
|
||||
page interaction, screenshot capture, browser-steps execution — lives here.
|
||||
"""
|
||||
|
||||
playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
|
||||
|
||||
proxy = None
|
||||
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Subclasses may use this (e.g. CDP); others ignore it
|
||||
self._custom_browser_connection_url = custom_browser_connection_url
|
||||
|
||||
proxy_args = {}
|
||||
for k in self.playwright_proxy_settings_mappings:
|
||||
v = os.getenv('playwright_proxy_' + k, False)
|
||||
if v:
|
||||
proxy_args[k] = v.strip('"')
|
||||
|
||||
if proxy_args:
|
||||
self.proxy = proxy_args
|
||||
|
||||
if proxy_override:
|
||||
self.proxy = {'server': proxy_override}
|
||||
|
||||
if self.proxy:
|
||||
parsed = urlparse(self.proxy.get('server', ''))
|
||||
if parsed.username:
|
||||
self.proxy['username'] = parsed.username
|
||||
self.proxy['password'] = parsed.password
|
||||
|
||||
def disk_cleanup_after_fetch(self):
|
||||
"""Delete browser-step screenshots written during this fetch."""
|
||||
self.delete_browser_steps_screenshots()
|
||||
|
||||
async def _connect_browser(self, playwright_instance):
|
||||
"""Return an open browser object. Must be overridden by each subclass."""
|
||||
raise NotImplementedError(f"{type(self).__name__} must implement _connect_browser()")
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
watch_uuid = getattr(self, 'watch_uuid', None)
|
||||
screenshot = await capture_full_page_async(
|
||||
page=self.page,
|
||||
screenshot_format=self.screenshot_format,
|
||||
watch_uuid=watch_uuid,
|
||||
lock_viewport_elements=self.lock_viewport_elements,
|
||||
)
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||
logger.debug(f"Saving step screenshot to {destination}")
|
||||
with open(destination, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
del screenshot
|
||||
gc.collect()
|
||||
|
||||
async def save_step_html(self, step_n):
|
||||
super().save_step_html(step_n=step_n)
|
||||
content = await self.page.content()
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
logger.debug(f"Saving step HTML to {destination}")
|
||||
with open(destination, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
del content
|
||||
gc.collect()
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
from playwright.async_api import async_playwright
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
|
||||
self.delete_browser_steps_screenshots()
|
||||
self.watch_uuid = watch_uuid
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await self._connect_browser(p)
|
||||
|
||||
ua = manage_user_agent(headers=request_headers) or self.profile_user_agent or None
|
||||
|
||||
context_kwargs = dict(
|
||||
accept_downloads=False,
|
||||
bypass_csp=True,
|
||||
extra_http_headers=request_headers,
|
||||
ignore_https_errors=self.ignore_https_errors,
|
||||
proxy=self.proxy,
|
||||
service_workers=self.service_workers,
|
||||
user_agent=ua,
|
||||
viewport={'width': self.viewport_width, 'height': self.viewport_height},
|
||||
)
|
||||
if self.locale:
|
||||
context_kwargs['locale'] = self.locale
|
||||
|
||||
context = await browser.new_context(**context_kwargs)
|
||||
|
||||
if self.block_images:
|
||||
await context.route(
|
||||
re.compile(r'\.(png|jpe?g|gif|svg|ico|webp|avif|bmp)(\?.*)?$', re.IGNORECASE),
|
||||
lambda route: route.abort()
|
||||
)
|
||||
if self.block_fonts:
|
||||
await context.route(
|
||||
re.compile(r'\.(woff2?|ttf|otf|eot)(\?.*)?$', re.IGNORECASE),
|
||||
lambda route: route.abort()
|
||||
)
|
||||
|
||||
self.page = await context.new_page()
|
||||
self.page.on("console", lambda msg: logger.debug(f"Playwright console: {url} {msg.type}: {msg.text}"))
|
||||
|
||||
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
|
||||
browsersteps_interface = steppable_browser_interface(start_url=url)
|
||||
browsersteps_interface.page = self.page
|
||||
|
||||
response = await browsersteps_interface.action_goto_url(value=url)
|
||||
|
||||
if response is None:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
try:
|
||||
self.headers = await response.all_headers()
|
||||
except TypeError:
|
||||
self.headers = response.all_headers()
|
||||
|
||||
try:
|
||||
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
||||
await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
||||
except playwright._impl._errors.TimeoutError:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
pass
|
||||
except Exception as e:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
extra_wait = self.extra_delay + self.render_extract_delay
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
try:
|
||||
self.status_code = response.status
|
||||
except Exception as e:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
try:
|
||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
||||
await _safe_request_gc(self.page)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching favicon: {e}")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
try:
|
||||
page_html = await self.page.content()
|
||||
except Exception as e:
|
||||
logger.warning(f"Got non-200 status {self.status_code} but failed to fetch page content: {e}")
|
||||
page_html = None
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot, page_html=page_html)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
try:
|
||||
if self.browser_steps:
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
raise
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
if current_include_filters is not None:
|
||||
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
else:
|
||||
await self.page.evaluate("var include_filters=''")
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
})
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
self.content = await self.page.content()
|
||||
await _safe_request_gc(self.page)
|
||||
logger.debug(f"Scrape xPath element data done in {time.time() - now:.2f}s")
|
||||
|
||||
self.screenshot = await capture_full_page_async(
|
||||
page=self.page,
|
||||
screenshot_format=self.screenshot_format,
|
||||
watch_uuid=watch_uuid,
|
||||
lock_viewport_elements=self.lock_viewport_elements,
|
||||
)
|
||||
await _safe_request_gc(self.page)
|
||||
gc.collect()
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
finally:
|
||||
for obj, name, close_coro in [
|
||||
(self.page if hasattr(self, 'page') and self.page else None, 'page', lambda: self.page.close() if self.page else asyncio.sleep(0)),
|
||||
(context, 'context', lambda: context.close() if context else asyncio.sleep(0)),
|
||||
(browser, 'browser', lambda: browser.close() if browser else asyncio.sleep(0)),
|
||||
]:
|
||||
try:
|
||||
await asyncio.wait_for(close_coro(), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing {name} for {url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing {name} for {url}: {e}")
|
||||
|
||||
self.page = None
|
||||
context = None
|
||||
browser = None
|
||||
gc.collect()
|
||||
@@ -1,27 +0,0 @@
|
||||
"""
|
||||
Playwright Chrome fetcher — launches a local Chromium browser directly.
|
||||
|
||||
No external browser container is required. Playwright must be installed
|
||||
with Chromium browsers: ``playwright install chromium``.
|
||||
"""
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright Chrome (local)"
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
launch_kwargs = {'headless': True}
|
||||
if self.proxy:
|
||||
launch_kwargs['proxy'] = self.proxy
|
||||
return await p.chromium.launch(**launch_kwargs)
|
||||
|
||||
|
||||
class PlaywrightChromePlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_chrome', fetcher)
|
||||
|
||||
|
||||
chrome_plugin = PlaywrightChromePlugin()
|
||||
@@ -1,33 +0,0 @@
|
||||
"""
|
||||
Playwright Firefox fetcher — launches a local Firefox browser directly.
|
||||
|
||||
No external browser container is required. Playwright must be installed
|
||||
with Firefox browsers: ``playwright install firefox``.
|
||||
|
||||
Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
|
||||
on Firefox — this is handled transparently by ``_safe_request_gc()`` in
|
||||
the base package.
|
||||
"""
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright Firefox (local)"
|
||||
|
||||
status_icon = {'filename': 'firefox-icon.svg', 'alt': 'Using Firefox', 'title': 'Using Firefox'}
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
launch_kwargs = {'headless': True}
|
||||
if self.proxy:
|
||||
launch_kwargs['proxy'] = self.proxy
|
||||
return await p.firefox.launch(**launch_kwargs)
|
||||
|
||||
|
||||
class PlaywrightFirefoxPlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_firefox', fetcher)
|
||||
|
||||
|
||||
firefox_plugin = PlaywrightFirefoxPlugin()
|
||||
@@ -1,30 +0,0 @@
|
||||
"""
|
||||
Playwright WebKit fetcher — launches a local WebKit (Safari-engine) browser.
|
||||
|
||||
No external browser container is required. Playwright must be installed
|
||||
with WebKit browsers: ``playwright install webkit``.
|
||||
|
||||
Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
|
||||
on WebKit — handled transparently by ``_safe_request_gc()`` in the base package.
|
||||
"""
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright WebKit/Safari (local)"
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
launch_kwargs = {'headless': True}
|
||||
if self.proxy:
|
||||
launch_kwargs['proxy'] = self.proxy
|
||||
return await p.webkit.launch(**launch_kwargs)
|
||||
|
||||
|
||||
class PlaywrightWebKitPlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_webkit', fetcher)
|
||||
|
||||
|
||||
webkit_plugin = PlaywrightWebKitPlugin()
|
||||
@@ -7,7 +7,6 @@ from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
|
||||
@@ -76,9 +75,6 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
@@ -90,8 +86,8 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
"""(y) => {
|
||||
const el = document.scrollingElement;
|
||||
if (el) el.scrollTop = y;
|
||||
document.documentElement.scrollTop = y;
|
||||
document.body.scrollTop = y;
|
||||
}""",
|
||||
y
|
||||
)
|
||||
@@ -170,8 +166,11 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Puppeteer Chromium"
|
||||
requires_connection_url = True
|
||||
fetcher_description = "Puppeteer/direct {}/Javascript".format(
|
||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||
)
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
|
||||
|
||||
browser = None
|
||||
browser_type = ''
|
||||
@@ -183,10 +182,14 @@ class fetcher(Fetcher):
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
|
||||
|
||||
def disk_cleanup_after_fetch(self):
|
||||
self.delete_browser_steps_screenshots()
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Puppeteer fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -195,10 +198,9 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_is_custom = True
|
||||
self.browser_connection_url = custom_browser_connection_url
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical("Puppeteer fetcher has no browser_connection_url — browser profile was not configured. "
|
||||
"Set PLAYWRIGHT_DRIVER_URL or configure a browser profile in Settings.")
|
||||
self.browser_connection_url = None
|
||||
# Fallback to fetching from system
|
||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||
self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')
|
||||
|
||||
# allow per-watch proxy selection override
|
||||
# @todo check global too?
|
||||
@@ -268,7 +270,7 @@ class fetcher(Fetcher):
|
||||
import re
|
||||
self.delete_browser_steps_screenshots()
|
||||
|
||||
n = self.extra_delay + self.render_extract_delay
|
||||
n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 12)) + self.render_extract_delay
|
||||
extra_wait = min(n, 15)
|
||||
|
||||
logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
|
||||
@@ -303,8 +305,6 @@ class fetcher(Fetcher):
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
finally:
|
||||
self.browser = None
|
||||
raise
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
@@ -445,12 +445,8 @@ class fetcher(Fetcher):
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
try:
|
||||
page_html = await self.page.content
|
||||
except Exception as e:
|
||||
logger.warning(f"Got non-200 status {self.status_code} but failed to fetch page content: {e}")
|
||||
page_html = None
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot, page_html=page_html)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
content = await self.page.content
|
||||
|
||||
@@ -536,24 +532,15 @@ class fetcher(Fetcher):
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
finally:
|
||||
# Internal cleanup on any exception/timeout - call quit() immediately
|
||||
# This prevents connection leaks during exception bursts
|
||||
# Worker.py's quit() call becomes a redundant safety net (idempotent)
|
||||
try:
|
||||
await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PuppeteerFetcherPlugin:
|
||||
"""Plugin class that registers the Puppeteer fetcher as a built-in plugin."""
|
||||
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Puppeteer fetcher"""
|
||||
return ('puppeteer', fetcher)
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from loguru import logger
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
@@ -8,8 +7,6 @@ import asyncio
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
|
||||
|
||||
# "html_requests" is listed as the default fetcher in store.py!
|
||||
@@ -82,48 +79,14 @@ class fetcher(Fetcher):
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
session.mount('file://', FileAdapter())
|
||||
|
||||
allow_iana_restricted = strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false'))
|
||||
|
||||
try:
|
||||
# Fresh DNS check at fetch time — catches DNS rebinding regardless of add-time cache.
|
||||
if not allow_iana_restricted:
|
||||
parsed_initial = urlparse(url)
|
||||
if parsed_initial.hostname and is_private_hostname(parsed_initial.hostname):
|
||||
raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow.")
|
||||
|
||||
r = session.request(method=request_method,
|
||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
|
||||
# Manually follow redirects so each hop's resolved IP can be validated,
|
||||
# preventing SSRF via an open redirect on a public host.
|
||||
current_url = url
|
||||
for _ in range(10):
|
||||
if not r.is_redirect:
|
||||
break
|
||||
location = r.headers.get('Location', '')
|
||||
redirect_url = urljoin(current_url, location)
|
||||
if not allow_iana_restricted:
|
||||
parsed_redirect = urlparse(redirect_url)
|
||||
if parsed_redirect.hostname and is_private_hostname(parsed_redirect.hostname):
|
||||
raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address.")
|
||||
current_url = redirect_url
|
||||
r = session.request('GET', redirect_url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
else:
|
||||
raise Exception("Too many redirects")
|
||||
|
||||
verify=False)
|
||||
except Exception as e:
|
||||
msg = str(e)
|
||||
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
||||
@@ -149,32 +112,10 @@ class fetcher(Fetcher):
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||
# (WHATWG encoding sniffing algorithm):
|
||||
# 1. BOM - highest confidence, check before anything else
|
||||
# 2. <meta charset> in first 2kb
|
||||
# 3. chardet statistical detection - last resort
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
boms = [
|
||||
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||
if bom_encoding:
|
||||
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||
r.encoding = bom_encoding
|
||||
else:
|
||||
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||
if meta_charset_match:
|
||||
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||
r.encoding = encoding
|
||||
else:
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
@@ -259,10 +200,9 @@ class fetcher(Fetcher):
|
||||
class RequestsFetcherPlugin:
|
||||
"""Plugin class that registers the requests fetcher as a built-in plugin."""
|
||||
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
"""Register the requests fetcher"""
|
||||
return ('requests', fetcher)
|
||||
return ('html_requests', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
|
||||
@@ -38,39 +38,26 @@
|
||||
if (a.size !== b.size) {
|
||||
return b.size - a.size;
|
||||
}
|
||||
|
||||
|
||||
// Second priority: apple-touch-icon over regular icon
|
||||
const isAppleA = /apple-touch-icon/.test(a.rel);
|
||||
const isAppleB = /apple-touch-icon/.test(b.rel);
|
||||
if (isAppleA && !isAppleB) return -1;
|
||||
if (!isAppleA && isAppleB) return 1;
|
||||
|
||||
|
||||
// Third priority: icons with no size attribute (fallback icons) last
|
||||
const hasNoSizeA = !a.hasSizes;
|
||||
const hasNoSizeB = !b.hasSizes;
|
||||
if (hasNoSizeA && !hasNoSizeB) return 1;
|
||||
if (!hasNoSizeA && hasNoSizeB) return -1;
|
||||
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
const timeoutMs = 2000;
|
||||
// 1 MB — matches the server-side limit in bump_favicon()
|
||||
const MAX_BYTES = 1 * 1024 * 1024;
|
||||
|
||||
for (const icon of icons) {
|
||||
try {
|
||||
// Inline data URI — no network fetch needed, data is already here
|
||||
if (icon.href.startsWith('data:')) {
|
||||
const match = icon.href.match(/^data:([^;]+);base64,([A-Za-z0-9+/=]+)$/);
|
||||
if (!match) continue;
|
||||
const mime_type = match[1];
|
||||
const base64 = match[2];
|
||||
// Rough size check: base64 is ~4/3 the binary size
|
||||
if (base64.length * 0.75 > MAX_BYTES) continue;
|
||||
return { url: icon.href, mime_type, base64 };
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
@@ -87,15 +74,12 @@
|
||||
|
||||
const blob = await resp.blob();
|
||||
|
||||
if (blob.size > MAX_BYTES) continue;
|
||||
|
||||
// Convert blob to base64
|
||||
const reader = new FileReader();
|
||||
return await new Promise(resolve => {
|
||||
reader.onloadend = () => {
|
||||
resolve({
|
||||
url: icon.href,
|
||||
mime_type: blob.type,
|
||||
base64: reader.result.split(",")[1]
|
||||
});
|
||||
};
|
||||
@@ -114,3 +98,4 @@
|
||||
// Auto-execute and return result for page.evaluate()
|
||||
return await window.getFaviconAsBlob();
|
||||
})();
|
||||
|
||||
|
||||
@@ -56,10 +56,6 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
|
||||
if total_height > capture_height:
|
||||
stitched = stitched.crop((0, 0, max_width, capture_height))
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
@@ -3,13 +3,13 @@ import time
|
||||
|
||||
from loguru import logger
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.content_fetchers.exceptions import Non200ErrorCodeReceived
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Selenium WebDriver Chrome"
|
||||
requires_connection_url = True
|
||||
if os.getenv("WEBDRIVER_URL"):
|
||||
fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\""
|
||||
else:
|
||||
fetcher_description = "WebDriver Chrome/Javascript"
|
||||
|
||||
proxy = None
|
||||
proxy_url = None
|
||||
@@ -19,21 +19,26 @@ class fetcher(Fetcher):
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for WebDriver fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
from urllib.parse import urlparse
|
||||
from selenium.webdriver.common.proxy import Proxy
|
||||
|
||||
if custom_browser_connection_url:
|
||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||
if not custom_browser_connection_url:
|
||||
self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
|
||||
else:
|
||||
self.browser_connection_is_custom = True
|
||||
self.browser_connection_url = custom_browser_connection_url
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical("Selenium WebDriver fetcher has no browser_connection_url — browser profile was not configured. "
|
||||
"Set WEBDRIVER_URL or configure a browser profile in Settings.")
|
||||
self.browser_connection_url = None
|
||||
|
||||
##### PROXY SETUP #####
|
||||
|
||||
@@ -99,17 +104,15 @@ class fetcher(Fetcher):
|
||||
|
||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
||||
from selenium.webdriver.remote.client_config import ClientConfig
|
||||
from urllib3.util import Timeout
|
||||
driver = None
|
||||
try:
|
||||
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
|
||||
client_config = ClientConfig(
|
||||
remote_server_addr=self.browser_connection_url,
|
||||
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
|
||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
||||
remote_connection = RemoteConnection(
|
||||
self.browser_connection_url,
|
||||
)
|
||||
remote_connection = RemoteConnection(client_config=client_config)
|
||||
remote_connection.set_timeout(30) # seconds
|
||||
|
||||
# Now create the driver with the RemoteConnection
|
||||
driver = RemoteWebDriver(
|
||||
command_executor=remote_connection,
|
||||
options=options
|
||||
@@ -127,28 +130,22 @@ class fetcher(Fetcher):
|
||||
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
|
||||
driver.set_window_size(1280, 1024)
|
||||
|
||||
driver.implicitly_wait(self.extra_delay)
|
||||
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
|
||||
if self.webdriver_js_execute_code is not None:
|
||||
driver.execute_script(self.webdriver_js_execute_code)
|
||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||
driver.implicitly_wait(self.extra_delay)
|
||||
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
|
||||
# @todo - how to check this? is it possible?
|
||||
self.status_code = 200
|
||||
# @todo somehow we should try to get this working for WebDriver
|
||||
# raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
# @todo - dom wait loaded?
|
||||
import time
|
||||
time.sleep(self.extra_delay + self.render_extract_delay)
|
||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||
self.content = driver.page_source
|
||||
|
||||
# Use Navigation Timing API to get the real HTTP status code (Chrome 102+)
|
||||
# Read after the sleep so the page is fully settled
|
||||
try:
|
||||
nav_status = driver.execute_script(
|
||||
"return window.performance.getEntriesByType('navigation')[0]?.responseStatus"
|
||||
)
|
||||
# Guard against 0 (file://, blocked requests) which should not raise Non200
|
||||
self.status_code = int(nav_status) if nav_status and int(nav_status) > 0 else 200
|
||||
except Exception:
|
||||
self.status_code = 200
|
||||
self.headers = {}
|
||||
|
||||
# Selenium always captures as PNG, convert to JPEG if needed
|
||||
@@ -178,10 +175,6 @@ class fetcher(Fetcher):
|
||||
img.close()
|
||||
else:
|
||||
self.screenshot = screenshot_png
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=self.screenshot, page_html=self.content)
|
||||
|
||||
except Exception as e:
|
||||
driver.quit()
|
||||
raise e
|
||||
@@ -197,10 +190,9 @@ class fetcher(Fetcher):
|
||||
class WebDriverSeleniumFetcherPlugin:
|
||||
"""Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""
|
||||
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
"""Register the WebDriver Selenium fetcher"""
|
||||
return ('selenium', fetcher)
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
|
||||
@@ -45,38 +45,8 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
# Regexes built from the constants above — no brittle hardcoded strings
|
||||
_EXTRACT_REMOVED_RE = re.compile(
|
||||
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
|
||||
)
|
||||
_EXTRACT_ADDED_RE = re.compile(
|
||||
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
|
||||
)
|
||||
|
||||
|
||||
def extract_changed_from(raw_diff: str) -> str:
|
||||
"""Extract only the removed/changed-from fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def extract_changed_to(raw_diff: str) -> str:
|
||||
"""Extract only the added/changed-into fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
"""
|
||||
Render word-level differences between two lines inline using diff-match-patch library.
|
||||
|
||||
@@ -163,20 +133,14 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if removed_tokens:
|
||||
removed_full = ''.join(removed_tokens).rstrip()
|
||||
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
else:
|
||||
result_parts.append(f'{removed_full}{trailing_removed}')
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
|
||||
if added_tokens:
|
||||
if result_parts: # Add newline between removed and added
|
||||
result_parts.append('\n')
|
||||
added_full = ''.join(added_tokens).rstrip()
|
||||
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
else:
|
||||
result_parts.append(f'{added_full}{trailing_added}')
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
else:
|
||||
@@ -186,27 +150,21 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if op == 0: # Equal
|
||||
result_parts.append(text)
|
||||
elif op == 1: # Insertion
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
result_parts.append(trailing)
|
||||
elif op == -1: # Deletion
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
result_parts.append(trailing)
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
|
||||
@@ -402,7 +360,7 @@ def customSequenceMatcher(
|
||||
|
||||
# Use inline word-level diff for single line replacements when word_diff is enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix)
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
|
||||
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
|
||||
if ignore_junk and not has_changes:
|
||||
# No real changes, skip this line
|
||||
|
||||
@@ -4,7 +4,6 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -28,6 +27,7 @@ from flask import (
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
from flask_restful import abort, Api
|
||||
from flask_cors import CORS
|
||||
|
||||
@@ -40,7 +40,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
@@ -69,43 +69,19 @@ socketio_server = None
|
||||
|
||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
||||
CORS(app)
|
||||
from werkzeug.routing import BaseConverter, ValidationError
|
||||
from uuid import UUID
|
||||
|
||||
class StrictUUIDConverter(BaseConverter):
|
||||
# Special sentinel values allowed in addition to strict UUIDs
|
||||
_ALLOWED_SENTINELS = frozenset({'first'})
|
||||
|
||||
def to_python(self, value: str) -> str:
|
||||
if value in self._ALLOWED_SENTINELS:
|
||||
return value
|
||||
try:
|
||||
u = UUID(value)
|
||||
except ValueError as e:
|
||||
raise ValidationError() from e
|
||||
# Reject non-standard formats (braces, URNs, no-hyphens)
|
||||
if str(u) != value.lower():
|
||||
raise ValidationError()
|
||||
return str(u)
|
||||
|
||||
def to_url(self, value) -> str:
|
||||
return str(value)
|
||||
|
||||
# app setup (once)
|
||||
app.url_map.converters["uuid_str"] = StrictUUIDConverter
|
||||
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak.
|
||||
# There's also a bug between flask compress and socketio that causes some kind of slow memory leak
|
||||
# It's better to use compression on your reverse proxy (nginx etc) instead.
|
||||
if strtobool(os.getenv("FLASK_ENABLE_COMPRESSION")):
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress = FlaskCompress()
|
||||
compress.init_app(app)
|
||||
|
||||
compress = FlaskCompress()
|
||||
|
||||
compress.init_app(app)
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
@@ -212,23 +188,14 @@ def _is_safe_valid_url(test_url):
|
||||
from .validate_url import is_safe_valid_url
|
||||
return is_safe_valid_url(test_url)
|
||||
|
||||
@app.template_global('get_html_head_extras')
|
||||
def _get_html_head_extras():
|
||||
from .pluggy_interface import collect_html_head_extras
|
||||
return collect_html_head_extras()
|
||||
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
return formatted_value
|
||||
|
||||
@app.template_filter('regex_search')
|
||||
def _jinja2_filter_regex_search(value, pattern):
|
||||
import re
|
||||
return re.search(pattern, str(value)) is not None
|
||||
return formatted_value
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
@@ -346,38 +313,52 @@ def _jinja2_filter_format_duration(seconds):
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Return status icon HTML for a fetcher, or empty string if none.
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
|
||||
Built-in fetchers declare their icon via the ``status_icon`` class attribute
|
||||
on their ``Fetcher`` subclass. Plugin fetchers may still use the pluggy
|
||||
``collect_fetcher_status_icons`` hook as a fallback.
|
||||
This filter checks both built-in fetchers and plugin fetchers for status icons.
|
||||
|
||||
Args:
|
||||
fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing status icon elements
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
|
||||
from markupsafe import Markup
|
||||
from flask import url_for
|
||||
|
||||
icon_data = None
|
||||
|
||||
fetcher_class = content_fetchers.get_fetcher(fetcher_name)
|
||||
if fetcher_class is not None:
|
||||
icon_data = getattr(fetcher_class, 'status_icon', None)
|
||||
if not icon_data and callable(getattr(fetcher_class, 'get_status_icon_data', None)):
|
||||
# First check if it's a plugin fetcher (plugins have priority)
|
||||
plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
|
||||
if plugin_icon_data:
|
||||
icon_data = plugin_icon_data
|
||||
# Check if it's a built-in fetcher
|
||||
elif hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
if hasattr(fetcher_class, 'get_status_icon_data'):
|
||||
icon_data = fetcher_class.get_status_icon_data()
|
||||
|
||||
# Fallback: pluggy hook for plugins that implement fetcher_status_icon
|
||||
if not icon_data:
|
||||
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
|
||||
icon_data = collect_fetcher_status_icons(fetcher_name)
|
||||
# Build HTML from icon data
|
||||
if icon_data and isinstance(icon_data, dict):
|
||||
# Use 'group' from icon_data if specified, otherwise default to 'images'
|
||||
group = icon_data.get('group', 'images')
|
||||
|
||||
if not icon_data:
|
||||
return ''
|
||||
# Try to use url_for, but fall back to manual URL building if endpoint not registered yet
|
||||
try:
|
||||
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
|
||||
except:
|
||||
# Fallback: build URL manually respecting APPLICATION_ROOT
|
||||
from flask import request
|
||||
app_root = request.script_root if hasattr(request, 'script_root') else ''
|
||||
icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"
|
||||
|
||||
group = icon_data.get('group', 'images')
|
||||
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
|
||||
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
|
||||
return Markup(f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>')
|
||||
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
|
||||
html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
|
||||
return Markup(html)
|
||||
|
||||
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||
return ''
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
@@ -390,8 +371,9 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
import re
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
@@ -479,21 +461,28 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
_locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||
# 3. Map browser locale to our internal locale if needed
|
||||
return _locale_aliases.get(browser_locale, browser_locale)
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -545,22 +534,22 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
|
||||
watch_api.add_resource(WatchHistoryDiff,
|
||||
'/api/v1/watch/<uuid_str:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history/<string:timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
watch_api.add_resource(WatchFavicon,
|
||||
'/api/v1/watch/<uuid_str:uuid>/favicon',
|
||||
'/api/v1/watch/<string:uuid>/favicon',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history',
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(CreateWatch, '/api/v1/watch',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(SystemInfo, '/api/v1/systeminfo',
|
||||
@@ -573,7 +562,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Tags, '/api/v1/tags',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Search, '/api/v1/search',
|
||||
@@ -582,8 +571,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Spec, '/api/v1/full-spec')
|
||||
|
||||
@login_manager.user_loader
|
||||
def user_loader(email):
|
||||
user = User()
|
||||
@@ -1006,16 +993,15 @@ def check_for_new_version():
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
session = requests.Session()
|
||||
session.verify = False
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = session.post("https://changedetection.io/check-ver.php",
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': __version__,
|
||||
'app_guid': datastore.data['app_guid'],
|
||||
'watch_count': len(datastore.data['watching'])
|
||||
})
|
||||
},
|
||||
|
||||
verify=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
+14
-69
@@ -608,12 +608,13 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import SafeXPath3Parser
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
@@ -667,11 +668,9 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise ValidationError("jq not support not found")
|
||||
|
||||
from changedetectionio.html_tools import validate_jq_expression
|
||||
input = line.replace('jq:', '')
|
||||
|
||||
try:
|
||||
validate_jq_expression(input)
|
||||
jq.compile(input)
|
||||
except (ValueError) as e:
|
||||
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
|
||||
@@ -725,7 +724,7 @@ class ValidateStartsWithRegex(object):
|
||||
raise ValidationError(self.message or _l("Invalid value."))
|
||||
|
||||
class quickWatchForm(Form):
|
||||
url = StringField(_l('URL'), validators=[validateURL()])
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
@@ -742,6 +741,7 @@ class commonSettingsForm(Form):
|
||||
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
|
||||
fetch_backend = RadioField(_l('Fetch Method'), choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
@@ -778,8 +778,7 @@ class SingleBrowserStep(Form):
|
||||
|
||||
class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
browser_profile = RadioField(_l('Browser / Fetch method'), choices=[]) # populated at runtime in edit.py
|
||||
url = StringField('Web Page URL', validators=[validateURL()])
|
||||
url = fields.URLField('Web Page URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group Tag', [validators.Optional()], default='')
|
||||
|
||||
time_between_check = EnhancedFormField(
|
||||
@@ -798,7 +797,6 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
|
||||
extract_lines_containing = StringListField(_l('Extract lines containing'), [validators.Optional()])
|
||||
extract_text = StringListField(_l('Extract text'), [ValidateListRegex()])
|
||||
|
||||
title = StringField(_l('Title'), default='')
|
||||
@@ -941,66 +939,10 @@ class SingleExtraBrowser(Form):
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
|
||||
|
||||
class BrowserProfileForm(Form):
|
||||
"""Create or edit a named BrowserProfile stored in settings.application.browser_profiles."""
|
||||
|
||||
name = StringField(
|
||||
_l('Profile name'),
|
||||
[validators.DataRequired(), validators.Length(max=100)],
|
||||
render_kw={"placeholder": _l("e.g. Mobile Chrome, Bright Data CDP"), "maxlength": "100"}
|
||||
)
|
||||
fetch_backend = SelectField(
|
||||
_l('Fetch method'),
|
||||
choices=[], # populated at runtime from available_fetchers()
|
||||
)
|
||||
browser_connection_url = StringField(
|
||||
_l('Browser connection URL'),
|
||||
[
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(wss?|ws|http|https)://',
|
||||
flags=re.IGNORECASE,
|
||||
message=_l('Browser connection URL must start with ws://, wss://, http://, https://')
|
||||
),
|
||||
ValidateSimpleURL(),
|
||||
],
|
||||
render_kw={"placeholder": "ws://my-chrome:3000", "size": 50}
|
||||
)
|
||||
viewport_width = IntegerField(
|
||||
_l('Viewport width (px)'),
|
||||
[validators.Optional(), validators.NumberRange(min=100, max=7680)],
|
||||
default=1280,
|
||||
render_kw={"style": "width:5em;"}
|
||||
)
|
||||
viewport_height = IntegerField(
|
||||
_l('Viewport height (px)'),
|
||||
[validators.Optional(), validators.NumberRange(min=100, max=4320)],
|
||||
default=1000,
|
||||
render_kw={"style": "width:5em;"}
|
||||
)
|
||||
block_images = BooleanField(_l('Block images (faster loads)'), default=False)
|
||||
block_fonts = BooleanField(_l('Block web fonts'), default=False)
|
||||
ignore_https_errors = BooleanField(_l('Ignore HTTPS/TLS errors'), default=False)
|
||||
user_agent = StringField(
|
||||
_l('User-Agent override'),
|
||||
[validators.Optional(), validators.Length(max=500)],
|
||||
render_kw={"placeholder": _l("Leave blank to use fetcher default"), "size": 60}
|
||||
)
|
||||
locale = StringField(
|
||||
_l('Locale'),
|
||||
[validators.Optional(), validators.Length(max=20)],
|
||||
render_kw={"placeholder": "en-US, de-DE, fr-FR …", "size": 15}
|
||||
)
|
||||
custom_headers = TextAreaField(
|
||||
_l('Custom headers'),
|
||||
[validators.Optional()],
|
||||
render_kw={
|
||||
"placeholder": "Header-Name: value\nAnother-Header: value",
|
||||
"rows": 4, "cols": 60,
|
||||
"style": "font-family:monospace;"
|
||||
}
|
||||
)
|
||||
class DefaultUAInputForm(Form):
|
||||
html_requests = StringField(_l('Plaintext requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
|
||||
html_webdriver = StringField(_l('Chrome requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
@@ -1024,6 +966,8 @@ class globalSettingsRequestForm(Form):
|
||||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||
|
||||
default_ua = FormField(DefaultUAInputForm, label=_l("Default User-Agent overrides"))
|
||||
|
||||
def validate_extra_proxies(self, extra_validators=None):
|
||||
for e in self.data['extra_proxies']:
|
||||
if e.get('proxy_name') or e.get('proxy_url'):
|
||||
@@ -1046,6 +990,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
|
||||
)
|
||||
empty_pages_are_a_change = BooleanField(_l('Treat empty pages as a change?'), default=False)
|
||||
fetch_backend = RadioField(_l('Fetch Method'), default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField(_l('Ignore Text'), [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
ignore_whitespace = BooleanField(_l('Ignore whitespace'))
|
||||
@@ -1061,7 +1006,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"})
|
||||
password = SaltyPasswordField(_l('Password'))
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
|
||||
+13
-146
@@ -4,7 +4,6 @@ from loguru import logger
|
||||
from typing import List
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
@@ -14,45 +13,6 @@ PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
|
||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# jq builtins that can leak sensitive data or cause harm when user-supplied expressions are executed.
|
||||
# env/$ENV reads all process environment variables (passwords, API keys, etc.)
|
||||
# include/import can read arbitrary files from disk
|
||||
# input/inputs reads beyond the supplied JSON data
|
||||
# debug/stderr leaks data to stderr
|
||||
# halt/halt_error terminates the process (DoS)
|
||||
_JQ_BLOCKED_PATTERNS = [
|
||||
(re.compile(r'\benv\b'), 'env (reads environment variables)'),
|
||||
(re.compile(r'\$ENV\b'), '$ENV (reads environment variables)'),
|
||||
(re.compile(r'\binclude\b'), 'include (reads files from disk)'),
|
||||
(re.compile(r'\bimport\b'), 'import (reads files from disk)'),
|
||||
(re.compile(r'\binputs?\b'), 'input/inputs (reads beyond provided data)'),
|
||||
(re.compile(r'\bdebug\b'), 'debug (leaks data to stderr)'),
|
||||
(re.compile(r'\bstderr\b'), 'stderr (leaks data to stderr)'),
|
||||
(re.compile(r'\bhalt(?:_error)?\b'), 'halt/halt_error (terminates the process)'),
|
||||
(re.compile(r'\$__loc__\b'), '$__loc__ (leaks file path information)'),
|
||||
(re.compile(r'\bbuiltins\b'), 'builtins (enumerates available functions)'),
|
||||
(re.compile(r'\bmodulemeta\b'), 'modulemeta (leaks module information)'),
|
||||
(re.compile(r'\$JQ_BUILD_CONFIGURATION\b'), '$JQ_BUILD_CONFIGURATION (leaks build information)'),
|
||||
]
|
||||
|
||||
def validate_jq_expression(expression: str) -> None:
|
||||
"""Raise ValueError if the jq expression uses any dangerous builtin.
|
||||
|
||||
User-supplied jq expressions are executed server-side. Without this check,
|
||||
builtins like `env` expose every process environment variable (SALTED_PASS,
|
||||
proxy credentials, API keys, etc.) as watch output.
|
||||
"""
|
||||
from changedetectionio.strtobool import strtobool
|
||||
if strtobool(os.getenv('JQ_ALLOW_RISKY_EXPRESSIONS', 'false')):
|
||||
return
|
||||
|
||||
for pattern, description in _JQ_BLOCKED_PATTERNS:
|
||||
if pattern.search(expression):
|
||||
msg = f"jq expression uses disallowed builtin: {description}"
|
||||
logger.critical(f"Security: blocked jq expression containing '{description}' - expression: {expression!r}")
|
||||
raise ValueError(msg)
|
||||
|
||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
@@ -63,59 +23,6 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||
'unparsed-text',
|
||||
'unparsed-text-lines',
|
||||
'unparsed-text-available',
|
||||
'doc',
|
||||
'doc-available',
|
||||
'json-doc',
|
||||
'json-doc-available',
|
||||
'collection', # XPath 2.0+: loads XML node collections from arbitrary URIs
|
||||
'uri-collection', # XPath 3.0+: enumerates URIs from resource collections
|
||||
'transform', # XPath 3.1: XSLT transformation (currently raises, block proactively)
|
||||
'load-xquery-module', # XPath 3.1: loads XQuery modules (currently raises, block proactively)
|
||||
'environment-variable',
|
||||
'available-environment-variables',
|
||||
]
|
||||
|
||||
|
||||
def _build_safe_xpath3_parser():
|
||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||
|
||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||
- doc / doc-available (XML fetch from URI)
|
||||
- environment-variable / available-environment-variables (env var leakage)
|
||||
|
||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||
so removing entries here does not affect XPath3Parser itself.
|
||||
|
||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||
"""
|
||||
import os
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
class SafeXPath3Parser(XPath3Parser):
|
||||
pass
|
||||
|
||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||
if env_override is not None:
|
||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||
else:
|
||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||
|
||||
for _fn in blocked:
|
||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||
|
||||
return SafeXPath3Parser
|
||||
|
||||
|
||||
# Module-level singleton — built once, reused everywhere.
|
||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
@@ -276,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
tree = None
|
||||
@@ -301,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
@@ -326,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
# Drop element references before the finally block so tree.clear() can release
|
||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||
del r
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
@@ -424,16 +330,12 @@ def _parse_json(json_data, json_filter):
|
||||
raise Exception("jq not support not found")
|
||||
|
||||
if json_filter.startswith("jq:"):
|
||||
expr = json_filter.removeprefix("jq:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
if json_filter.startswith("jqraw:"):
|
||||
expr = json_filter.removeprefix("jqraw:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return '\n'.join(str(item) for item in match)
|
||||
|
||||
@@ -537,25 +439,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||
# Server may claim application/json but actually return JSONP
|
||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||
if jsonp_match:
|
||||
try:
|
||||
inner = jsonp_match.group(1).strip()
|
||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
@@ -671,33 +561,10 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
|
||||
)
|
||||
else:
|
||||
parser_config = None
|
||||
|
||||
if is_rss:
|
||||
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||
else:
|
||||
# Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
|
||||
# causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
|
||||
# can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
|
||||
# causing the regex to scan past the intended close and eat real page content.
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
# Strip tags that inscriptis cannot render as meaningful text and which can be very large.
|
||||
# svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
|
||||
# video/audio/picture are kept — they may contain meaningful fallback text or captions.
|
||||
for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
|
||||
'math', 'canvas', 'iframe', 'template']):
|
||||
tag.decompose()
|
||||
|
||||
# SPAs often use <body style="display:none"> to hide content until JS loads.
|
||||
# inscriptis respects CSS display rules, so strip hiding styles from the body tag.
|
||||
body_tag = soup.find('body')
|
||||
if body_tag and body_tag.get('style'):
|
||||
style = body_tag['style']
|
||||
if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
|
||||
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
|
||||
del body_tag['style']
|
||||
|
||||
html_content = str(soup)
|
||||
|
||||
text_content = get_text(html_content, config=parser_config)
|
||||
return text_content
|
||||
|
||||
@@ -28,20 +28,17 @@ def get_timeago_locale(flask_locale):
|
||||
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'pt_BR': 'pt_BR', # Portuguese (Brasil)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'ja': 'ja', # Japanese
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
@@ -55,8 +52,7 @@ LANGUAGE_DATA = {
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
|
||||
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
|
||||
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
@@ -71,7 +67,6 @@ LANGUAGE_DATA = {
|
||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||
'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
||||
from changedetectionio.model.Tags import TagsDict
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
@@ -12,6 +11,7 @@ from changedetectionio.notification import (
|
||||
|
||||
# Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification
|
||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
||||
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
||||
|
||||
|
||||
|
||||
@@ -30,6 +30,10 @@ class model(dict):
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections
|
||||
'default_ua': {
|
||||
'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
|
||||
'html_webdriver': None,
|
||||
}
|
||||
},
|
||||
'application': {
|
||||
# Custom notification content
|
||||
@@ -38,9 +42,7 @@ class model(dict):
|
||||
'api_access_token_enabled': True,
|
||||
'base_url' : None,
|
||||
'empty_pages_are_a_change': False,
|
||||
'browser_profile': None, # machine-name of the system-default BrowserProfile
|
||||
'browser_profiles': {}, # user-defined profiles keyed by machine name
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "requests"),
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
@@ -66,7 +68,7 @@ class model(dict):
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'strip_ignored_lines': False,
|
||||
'tags': None, # Initialized in __init__ with real datastore_path
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'ui': {
|
||||
'use_page_title_in_list': True,
|
||||
@@ -78,16 +80,10 @@ class model(dict):
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *arg, datastore_path=None, **kw):
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
# Capture any tags data passed in before base_config overwrites the structure
|
||||
existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
|
||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||
self.update(deepcopy(self.base_config))
|
||||
# TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
|
||||
if datastore_path is None:
|
||||
raise ValueError("App.model() requires 'datastore_path' keyword argument")
|
||||
self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
|
||||
@@ -46,26 +46,11 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
def matches_url(self, url: str) -> bool:
|
||||
"""Return True if this tag should be auto-applied to the given watch URL.
|
||||
|
||||
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
|
||||
substring match. Returns False if no pattern is configured.
|
||||
"""
|
||||
import fnmatch
|
||||
pattern = self.get('url_match_pattern', '').strip()
|
||||
if not pattern or not url:
|
||||
return False
|
||||
if any(c in pattern for c in ('*', '?', '[')):
|
||||
return fnmatch.fnmatch(url.lower(), pattern.lower())
|
||||
return pattern.lower() in url.lower()
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
|
||||
class TagsDict(dict):
|
||||
"""Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
|
||||
|
||||
def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
|
||||
self._datastore_path = Path(datastore_path)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
super().__delitem__(key)
|
||||
tag_dir = self._datastore_path / key
|
||||
tag_json_file = tag_dir / "tag.json"
|
||||
if not os.path.exists(tag_json_file):
|
||||
logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
|
||||
return
|
||||
try:
|
||||
shutil.rmtree(tag_dir)
|
||||
logger.info(f"Deleted tag directory for tag {key!r}")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
|
||||
|
||||
def pop(self, key: str, default=_SENTINEL):
|
||||
"""Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
|
||||
if key in self:
|
||||
value = self[key]
|
||||
del self[key]
|
||||
return value
|
||||
if default is _SENTINEL:
|
||||
raise KeyError(key)
|
||||
return default
|
||||
@@ -43,11 +43,6 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||
_FAVICON_FILENAME_CACHE: dict = {}
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
@@ -353,40 +348,40 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def is_source_type_url(self):
|
||||
return self.get('url', '').startswith('source:')
|
||||
|
||||
@property
|
||||
def effective_browser_profile(self):
|
||||
"""Resolve the effective BrowserProfile for this watch.
|
||||
|
||||
Walks the chain: watch → tag (overrides_watch=True) → global settings → built-in fallback.
|
||||
Never raises. Returns a BrowserProfile instance.
|
||||
"""
|
||||
from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS
|
||||
if not self._datastore:
|
||||
return BUILTIN_REQUESTS
|
||||
try:
|
||||
return resolve_browser_profile(self, self._datastore)
|
||||
except Exception:
|
||||
return BUILTIN_REQUESTS
|
||||
|
||||
@property
|
||||
def get_fetch_backend(self):
|
||||
"""Legacy property — prefer effective_browser_profile.fetch_backend for new code.
|
||||
|
||||
Returns the raw fetch_backend stored on this watch (or 'requests' for PDFs).
|
||||
Does NOT walk the tag/global resolution chain.
|
||||
"""
|
||||
if self.is_pdf:
|
||||
return 'requests'
|
||||
return self.get('fetch_backend')
|
||||
Get the fetch backend for this watch with special case handling.
|
||||
|
||||
@property
|
||||
def fetcher_supports_screenshots(self):
|
||||
"""Return True if the resolved fetcher for this watch supports screenshots."""
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_class = content_fetchers.get_fetcher(self.effective_browser_profile.fetch_backend)
|
||||
if fetcher_class is None:
|
||||
return False
|
||||
return bool(getattr(fetcher_class, 'supports_screenshots', False))
|
||||
CHAIN RESOLUTION OPPORTUNITY:
|
||||
Currently returns watch.fetch_backend directly, but doesn't implement
|
||||
Watch → Tag → Global resolution chain. With Pydantic:
|
||||
|
||||
@computed_field
|
||||
def resolved_fetch_backend(self) -> str:
|
||||
# Special case: PDFs always use html_requests
|
||||
if self.is_pdf:
|
||||
return 'html_requests'
|
||||
|
||||
# Watch override
|
||||
if self.fetch_backend and self.fetch_backend != 'system':
|
||||
return self.fetch_backend
|
||||
|
||||
# Tag override (first tag with overrides_watch=True wins)
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.fetch_backend:
|
||||
return tag.fetch_backend
|
||||
|
||||
# Global default
|
||||
return self._datastore.settings.fetch_backend
|
||||
"""
|
||||
# Maybe also if is_image etc?
|
||||
# This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
|
||||
if self.is_pdf:
|
||||
return 'html_requests'
|
||||
|
||||
return self.get('fetch_backend')
|
||||
|
||||
@property
|
||||
def is_pdf(self):
|
||||
@@ -779,50 +774,24 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
# Also in the case that the file didnt exist
|
||||
return True
|
||||
|
||||
def bump_favicon(self, url, favicon_base_64: str, mime_type: str = None) -> None:
|
||||
def bump_favicon(self, url, favicon_base_64: str) -> None:
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
import binascii
|
||||
import re
|
||||
decoded = None
|
||||
|
||||
MAX_FAVICON_BYTES = 1 * 1024 * 1024 # 1 MB
|
||||
|
||||
MIME_TO_EXT = {
|
||||
'image/png': 'png',
|
||||
'image/x-icon': 'ico',
|
||||
'image/vnd.microsoft.icon': 'ico',
|
||||
'image/jpeg': 'jpg',
|
||||
'image/gif': 'gif',
|
||||
'image/svg+xml': 'svg',
|
||||
'image/webp': 'webp',
|
||||
'image/bmp': 'bmp',
|
||||
}
|
||||
|
||||
extension = None
|
||||
|
||||
# If the caller already resolved the MIME type (e.g. from blob.type or a data URI),
|
||||
# use that directly — it's more reliable than guessing from a URL path.
|
||||
if mime_type:
|
||||
extension = MIME_TO_EXT.get(mime_type.lower().split(';')[0].strip(), None)
|
||||
|
||||
# Fall back to extracting extension from URL path, unless it's a data URI.
|
||||
if not extension and url and not url.startswith('data:'):
|
||||
if url:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
filename = os.path.basename(parsed.path)
|
||||
(_base, ext) = filename.lower().strip().rsplit('.', 1)
|
||||
extension = ext
|
||||
(base, extension) = filename.lower().strip().rsplit('.', 1)
|
||||
except ValueError:
|
||||
logger.warning(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}', defaulting to ico")
|
||||
|
||||
# Handle data URIs: extract MIME type from the URI itself when not already known
|
||||
if not extension and url and url.startswith('data:'):
|
||||
m = re.match(r'^data:([^;]+);base64,', url)
|
||||
if m:
|
||||
extension = MIME_TO_EXT.get(m.group(1).lower(), None)
|
||||
|
||||
if not extension:
|
||||
extension = 'ico'
|
||||
logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
|
||||
return None
|
||||
else:
|
||||
# Assume favicon.ico
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.data_dir, f"favicon.{extension}")
|
||||
|
||||
@@ -831,50 +800,58 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
decoded = base64.b64decode(favicon_base_64, validate=True)
|
||||
except (binascii.Error, ValueError) as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
|
||||
return None
|
||||
else:
|
||||
if decoded:
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
if len(decoded) > MAX_FAVICON_BYTES:
|
||||
logger.warning(f"UUID: {self.get('uuid')} Favicon too large ({len(decoded)} bytes), skipping")
|
||||
return None
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
||||
|
||||
# Invalidate module-level favicon filename cache for this watch
|
||||
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
|
||||
|
||||
# @todo - Store some checksum and only write when its different
|
||||
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the watch data directory.
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||
deepcopy (which drops instance attrs), and concurrent request races.
|
||||
Invalidated by bump_favicon() when a new favicon is saved.
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
|
||||
Returns:
|
||||
str: Basename of the favicon file, or None if not found.
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
"""
|
||||
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
fname = os.path.basename(files[0]) if files else None
|
||||
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||
return fname
|
||||
|
||||
if not files:
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -1205,13 +1182,18 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for, has_request_context
|
||||
from flask import url_for
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
has_app_context = has_request_context()
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
|
||||
@@ -186,9 +186,7 @@ class watch_base(dict):
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'content-type': None,
|
||||
'date_created': None,
|
||||
'extract_lines_containing': [], # Keep only lines containing these substrings (plain text, case-insensitive)
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'browser_profile': 'system', # machine-name key of a BrowserProfile; 'system' → resolve via chain
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'fetch_time': 0.0,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
@@ -339,7 +337,6 @@ class watch_base(dict):
|
||||
# These are set by processors/workers and should not trigger edited flag
|
||||
additional_system_fields = {
|
||||
'last_check_status', # Set by processors
|
||||
'last_filter_config_hash', # Set by text_json_diff processor, internal skip-cache
|
||||
'restock', # Set by restock processor
|
||||
'last_viewed', # Set by mark_all_viewed endpoint
|
||||
}
|
||||
@@ -592,9 +589,7 @@ class watch_base(dict):
|
||||
return None
|
||||
|
||||
try:
|
||||
# _datastore is a ChangeDetectionStore (has .data) or a plain dict (unit tests)
|
||||
store_data = self._datastore.data if hasattr(self._datastore, 'data') else self._datastore
|
||||
value = store_data['settings']
|
||||
value = self._datastore['settings']
|
||||
for key in path:
|
||||
value = value[key]
|
||||
return value
|
||||
|
||||
@@ -1,380 +0,0 @@
|
||||
"""
|
||||
BrowserProfile — named, reusable browser/fetcher configuration.
|
||||
|
||||
Storage key
|
||||
-----------
|
||||
Profiles are stored in ``settings.application.browser_profiles`` as a plain dict
|
||||
keyed by *machine name* — a lowercase, underscore-separated slug derived from the
|
||||
human-readable ``name`` field:
|
||||
|
||||
'My Blocking Chrome' → 'my_blocking_chrome'
|
||||
'Custom CDP — Mobile (375px)' → 'custom_cdp_mobile_375px'
|
||||
|
||||
Using the machine name as the key means that deleting a profile and recreating
|
||||
it with the same name restores the original key, so all watches that referenced
|
||||
it continue to work without any manual re-linking.
|
||||
|
||||
Resolution chain
|
||||
----------------
|
||||
``resolve_browser_profile(watch, datastore)`` walks:
|
||||
|
||||
watch.browser_profile → first tag with overrides_watch=True →
|
||||
settings.application.browser_profile → built-in fallback
|
||||
|
||||
It never raises. Stale / missing machine-name references are logged and the
|
||||
resolver falls through to the next level.
|
||||
|
||||
Built-in profiles
|
||||
-----------------
|
||||
``BUILTIN_REQUESTS`` and ``BUILTIN_BROWSER`` are always available and cannot be
|
||||
deleted from the UI (``is_builtin=True``). Their machine names are stored in
|
||||
``RESERVED_MACHINE_NAMES`` to block user profiles from shadowing them.
|
||||
|
||||
Migration
|
||||
---------
|
||||
``store/updates.py::update_31`` converts the legacy ``fetch_backend`` field on
|
||||
watches, tags and global settings into ``browser_profile`` machine-name
|
||||
references. After that migration no legacy paths are needed here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
# Default User-Agent for the built-in plaintext requests profile.
|
||||
# Overridable via environment variable for deployments that need a custom UA.
|
||||
_DEFAULT_REQUESTS_UA = os.getenv(
|
||||
"DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
NAME_MAX_LEN = 100
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BrowserProfile(BaseModel):
|
||||
"""
|
||||
A named, reusable configuration for how a watch fetches its target URL.
|
||||
|
||||
The *machine name* (see ``get_machine_name()``) is the stable storage key.
|
||||
Updating ``name`` changes the machine name; any watch that referenced the
|
||||
old machine name will then fall back through the resolution chain until it
|
||||
is explicitly re-pointed. To replace a profile without breaking watches,
|
||||
delete it and recreate it with the *same* name.
|
||||
"""
|
||||
|
||||
name: str
|
||||
"""Human-readable label shown in the UI. Max 100 characters."""
|
||||
|
||||
fetch_backend: str = 'requests'
|
||||
"""
|
||||
Which fetch engine to use. This is the *clean* fetcher name without the
|
||||
``html_`` module prefix (e.g. ``'requests'``, ``'webdriver'``,
|
||||
``'playwright'``, ``'puppeteer'``, ``'cloakbrowser'``).
|
||||
|
||||
The module-level ``html_`` prefix (``html_requests``, ``html_webdriver``,
|
||||
…) is an implementation detail of ``content_fetchers/``. Use
|
||||
``get_fetcher_class_name()`` to obtain the full module attribute name when
|
||||
you need to look up the class.
|
||||
|
||||
Must be non-empty and contain only ``[a-z0-9_]`` characters.
|
||||
"""
|
||||
|
||||
is_builtin: bool = False
|
||||
"""Built-in profiles are always present and cannot be deleted from the UI."""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Browser-specific settings (silently ignored by html_requests)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
browser_connection_url: Optional[str] = None
|
||||
"""
|
||||
Custom CDP / WebSocket endpoint, e.g. ``ws://my-chrome:3000``.
|
||||
Overrides the system-wide ``PLAYWRIGHT_DRIVER_URL`` for this profile.
|
||||
Only meaningful for ``html_webdriver`` profiles.
|
||||
"""
|
||||
|
||||
viewport_width: int = 1280
|
||||
"""
|
||||
Browser viewport width in pixels.
|
||||
Common presets: 375 (iPhone), 768 (tablet), 1280 (desktop).
|
||||
"""
|
||||
|
||||
viewport_height: int = 1000
|
||||
"""
|
||||
Browser viewport height in pixels.
|
||||
Common presets: 812 (iPhone), 1024 (tablet), 1000 (desktop).
|
||||
"""
|
||||
|
||||
block_images: bool = False
|
||||
"""
|
||||
Block all image requests. Typically cuts page-load time by 40-70 % on
|
||||
image-heavy sites with no impact on text-based change detection.
|
||||
"""
|
||||
|
||||
block_fonts: bool = False
|
||||
"""Block web-font requests. Modest speed gain; rarely affects detection."""
|
||||
|
||||
user_agent: Optional[str] = None
|
||||
"""
|
||||
Override the browser User-Agent string.
|
||||
``None`` keeps the fetcher's built-in default, which already strips
|
||||
obvious headless markers such as ``HeadlessChrome``.
|
||||
"""
|
||||
|
||||
ignore_https_errors: bool = False
|
||||
"""
|
||||
Proceed even when the server's TLS certificate is invalid or self-signed.
|
||||
Useful for staging / development environments.
|
||||
"""
|
||||
|
||||
locale: Optional[str] = None
|
||||
"""
|
||||
Browser locale (e.g. ``en-US``, ``de-DE``).
|
||||
Sets the ``Accept-Language`` header and ``navigator.language``.
|
||||
Some sites serve different prices or copy based on locale.
|
||||
"""
|
||||
|
||||
custom_headers: str = ''
|
||||
"""
|
||||
Extra HTTP headers sent with every request using this profile, in ``Key: Value`` format
|
||||
(one per line, ``#`` lines are ignored). Applied before per-watch headers so
|
||||
individual watches can override them.
|
||||
"""
|
||||
|
||||
service_workers: str = 'allow'
|
||||
"""
|
||||
Whether to allow Service Workers in the browser context.
|
||||
Playwright accepts ``'allow'`` or ``'block'``.
|
||||
Block to avoid large Service Worker data transfers (e.g. YouTube).
|
||||
"""
|
||||
|
||||
extra_delay: int = 0
|
||||
"""
|
||||
Extra seconds to wait after page load before extracting content
|
||||
(on top of the per-watch ``render_extract_delay``).
|
||||
Sourced from ``WEBDRIVER_DELAY_BEFORE_CONTENT_READY`` at startup.
|
||||
"""
|
||||
|
||||
model_config = {"frozen": False}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Validators
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@field_validator('fetch_backend')
|
||||
@classmethod
|
||||
def _validate_fetch_backend(cls, v: str) -> str:
|
||||
v = v.strip()
|
||||
if not v:
|
||||
raise ValueError('fetch_backend cannot be empty')
|
||||
if not re.fullmatch(r'[a-z0-9_]+', v):
|
||||
raise ValueError(
|
||||
f"fetch_backend must contain only lowercase letters, digits and underscores, got {v!r}"
|
||||
)
|
||||
if v.startswith('html_'):
|
||||
raise ValueError(
|
||||
f"fetch_backend should be the clean fetcher name without the 'html_' prefix "
|
||||
f"(e.g. 'requests', 'webdriver', 'playwright'). Got {v!r}. "
|
||||
f"Use get_fetcher_class_name() to obtain the full module attribute name."
|
||||
)
|
||||
return v
|
||||
|
||||
@field_validator('name')
|
||||
@classmethod
|
||||
def _validate_name(cls, v: str) -> str:
|
||||
v = v.strip()
|
||||
if not v:
|
||||
raise ValueError('Name cannot be empty')
|
||||
if len(v) > NAME_MAX_LEN:
|
||||
raise ValueError(f'Name must be {NAME_MAX_LEN} characters or less')
|
||||
return v
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Machine-name helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def machine_name_from_str(name: str) -> str:
|
||||
"""
|
||||
Convert a human name to a machine-safe storage key.
|
||||
|
||||
Transformation rules (applied in order):
|
||||
|
||||
1. Strip surrounding whitespace; lower-case.
|
||||
2. Replace runs of whitespace or hyphens with a single ``_``.
|
||||
3. Drop every character that is not ``[a-z0-9_]``.
|
||||
4. Collapse consecutive underscores.
|
||||
5. Strip leading / trailing underscores.
|
||||
6. Truncate to ``NAME_MAX_LEN`` characters.
|
||||
|
||||
Examples::
|
||||
|
||||
'My Blocking Browser Chrome' → 'my_blocking_browser_chrome'
|
||||
'Custom CDP — Mobile (375px)' → 'custom_cdp_mobile_375px'
|
||||
' Weird --- Name ' → 'weird_name'
|
||||
"""
|
||||
s = name.strip().lower()
|
||||
s = re.sub(r'[\s\-]+', '_', s) # whitespace / hyphens → underscore
|
||||
s = re.sub(r'[^a-z0-9_]', '', s) # drop everything else
|
||||
s = re.sub(r'_+', '_', s) # collapse repeated underscores
|
||||
s = s.strip('_') # drop leading / trailing underscores
|
||||
return s[:NAME_MAX_LEN]
|
||||
|
||||
def get_machine_name(self) -> str:
|
||||
"""Return the machine-safe storage key derived from this profile's ``name``."""
|
||||
return self.machine_name_from_str(self.name)
|
||||
|
||||
def get_fetcher_class_name(self) -> str:
|
||||
"""Return the clean fetcher name for this profile (same as ``fetch_backend``).
|
||||
|
||||
Use with ``content_fetchers.get_fetcher()``::
|
||||
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_cls = content_fetchers.get_fetcher(profile.get_fetcher_class_name())
|
||||
"""
|
||||
return self.fetch_backend
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Built-in profiles (always present, cannot be deleted)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BUILTIN_REQUESTS = BrowserProfile(
|
||||
name='Direct HTTP (requests)',
|
||||
fetch_backend='requests',
|
||||
is_builtin=True,
|
||||
user_agent=_DEFAULT_REQUESTS_UA,
|
||||
)
|
||||
|
||||
BUILTIN_PLAYWRIGHT = BrowserProfile(
|
||||
name='Browser (Chrome/Playwright)',
|
||||
fetch_backend='playwright_cdp',
|
||||
is_builtin=True,
|
||||
)
|
||||
|
||||
BUILTIN_SELENIUM = BrowserProfile(
|
||||
name='Browser (Chrome/Selenium)',
|
||||
fetch_backend='selenium',
|
||||
is_builtin=True,
|
||||
)
|
||||
|
||||
BUILTIN_PUPPETEER = BrowserProfile(
|
||||
name='Browser (Chrome/Puppeteer)',
|
||||
fetch_backend='puppeteer',
|
||||
is_builtin=True,
|
||||
)
|
||||
|
||||
# Backwards-compatible alias — code that imported BUILTIN_BROWSER keeps working.
|
||||
BUILTIN_BROWSER = BUILTIN_PLAYWRIGHT
|
||||
|
||||
# Keyed by machine name for O(1) lookup.
|
||||
_BUILTINS: dict[str, BrowserProfile] = {
|
||||
b.get_machine_name(): b
|
||||
for b in (BUILTIN_REQUESTS, BUILTIN_PLAYWRIGHT, BUILTIN_SELENIUM, BUILTIN_PUPPETEER)
|
||||
}
|
||||
|
||||
# Machine names that cannot be used by user-created profiles.
|
||||
RESERVED_MACHINE_NAMES: frozenset[str] = frozenset(_BUILTINS.keys())
|
||||
|
||||
|
||||
def get_default_browser_builtin() -> BrowserProfile:
|
||||
"""Final fallback when no profile can be resolved through the chain.
|
||||
|
||||
``preconfigure_browser_profiles_based_on_env()`` sets
|
||||
``settings.application.browser_profile`` explicitly at startup, so this
|
||||
fallback is only reached for watches with stale / missing machine-name
|
||||
references. Safe default is always direct HTTP requests.
|
||||
"""
|
||||
return BUILTIN_REQUESTS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lookup helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_builtin_profiles() -> dict[str, BrowserProfile]:
|
||||
"""Return a shallow copy of the built-in profiles dict (keyed by machine name)."""
|
||||
return dict(_BUILTINS)
|
||||
|
||||
|
||||
def get_profile(machine_name: str, store_profiles: dict) -> Optional[BrowserProfile]:
|
||||
"""
|
||||
Look up a ``BrowserProfile`` by machine name.
|
||||
|
||||
Stored profiles are checked first so that env-configured built-ins (written
|
||||
by ``preconfigure_browser_profiles_based_on_env``) take priority over the
|
||||
bare module-level defaults. Falls back to ``_BUILTINS`` when no stored
|
||||
version exists.
|
||||
|
||||
Returns ``None`` when the machine name is unknown or the stored data is
|
||||
corrupt (a warning is logged in the latter case).
|
||||
"""
|
||||
raw = store_profiles.get(machine_name)
|
||||
if raw is not None:
|
||||
if isinstance(raw, BrowserProfile):
|
||||
return raw
|
||||
try:
|
||||
return BrowserProfile(**raw)
|
||||
except Exception as exc:
|
||||
logger.warning(f"BrowserProfile '{machine_name}': failed to deserialize — {exc}")
|
||||
# Fall through to built-in
|
||||
|
||||
if machine_name in _BUILTINS:
|
||||
return _BUILTINS[machine_name]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def resolve_browser_profile(watch, datastore) -> BrowserProfile:
|
||||
"""
|
||||
Resolve the effective ``BrowserProfile`` for *watch*.
|
||||
|
||||
Resolution chain
|
||||
~~~~~~~~~~~~~~~~
|
||||
1. ``watch['browser_profile']`` — explicit machine name set on the watch.
|
||||
2. First tag with ``overrides_watch=True`` that has ``browser_profile`` set.
|
||||
3. ``settings.application['browser_profile']`` — system-wide default.
|
||||
4. Built-in fallback: ``BUILTIN_REQUESTS`` (requests is always the safe default).
|
||||
|
||||
Never raises. A stale / missing machine-name reference produces a
|
||||
``logger.warning`` and the resolver continues down the chain.
|
||||
"""
|
||||
from changedetectionio.model.resolver import resolve_setting
|
||||
|
||||
store_profiles: dict = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
|
||||
machine_name = resolve_setting(
|
||||
watch, datastore,
|
||||
field_name='browser_profile',
|
||||
sentinel_values={'system', 'default', ''},
|
||||
default=None,
|
||||
require_tag_override=True,
|
||||
)
|
||||
|
||||
if machine_name:
|
||||
profile = get_profile(machine_name, store_profiles)
|
||||
if profile:
|
||||
return profile
|
||||
logger.warning(
|
||||
f"Watch {watch.get('uuid')!r}: browser_profile {machine_name!r} not found, "
|
||||
f"falling back through the chain"
|
||||
)
|
||||
|
||||
return get_default_browser_builtin()
|
||||
@@ -1,63 +0,0 @@
|
||||
"""
|
||||
Unified Watch → Tag → Global settings cascade resolver.
|
||||
|
||||
All settings resolution follows the same priority order:
|
||||
1. Watch-level setting (if set and not a sentinel "use parent" value)
|
||||
2. First tag with overrides_watch=True that has the field set
|
||||
3. Global application settings
|
||||
4. Caller-supplied default
|
||||
|
||||
This replaces the previously scattered manual resolution loops found in
|
||||
notification_service.py, processors/base.py, and the restock processor.
|
||||
"""
|
||||
|
||||
|
||||
def resolve_setting(watch, datastore, field_name, *,
|
||||
sentinel_values=None,
|
||||
default=None,
|
||||
require_tag_override=True):
|
||||
"""
|
||||
Resolve a single setting value by walking the Watch → Tag → Global chain.
|
||||
|
||||
Args:
|
||||
watch: Watch dict / model object.
|
||||
datastore: App datastore (must have get_all_tags_for_watch() and
|
||||
data['settings']['application']).
|
||||
field_name: The setting key to look up at each level.
|
||||
sentinel_values: Set of values that mean "not configured here, keep looking".
|
||||
For example {'system'} for fetch_backend.
|
||||
default: Value returned when nothing is found in the chain.
|
||||
require_tag_override: If True (default), only tags where overrides_watch=True
|
||||
contribute to the cascade. Set to False when every tag
|
||||
that carries the field should be considered (e.g. for
|
||||
fields that make sense to merge/override at any tag level).
|
||||
|
||||
Returns:
|
||||
The first non-sentinel, non-empty value found, or *default*.
|
||||
"""
|
||||
_sentinels = set(sentinel_values) if sentinel_values else set()
|
||||
|
||||
def _is_unset(v):
|
||||
return v is None or v == '' or v in _sentinels
|
||||
|
||||
# 1. Watch level
|
||||
v = watch.get(field_name)
|
||||
if not _is_unset(v):
|
||||
return v
|
||||
|
||||
# 2. Tag level
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag in tags.values():
|
||||
if require_tag_override and not tag.get('overrides_watch'):
|
||||
continue
|
||||
v = tag.get(field_name)
|
||||
if not _is_unset(v):
|
||||
return v
|
||||
|
||||
# 3. Global application settings
|
||||
v = datastore.data['settings']['application'].get(field_name)
|
||||
if not _is_unset(v):
|
||||
return v
|
||||
|
||||
return default
|
||||
@@ -259,12 +259,9 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
|
||||
or url.startswith('https://discord.com/api'))\
|
||||
and 'html' in requested_output_format:
|
||||
# Discord doesn't render HTML — convert markup to plain text equivalents.
|
||||
# is injected upstream to preserve double-spaces for HTML email clients;
|
||||
# Discord displays it as the literal string " " so strip it here.
|
||||
# Discord doesn't support HTML, replace <br> with newlines
|
||||
n_body = n_body.strip().replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = n_body.replace(' ', ' ')
|
||||
n_body = newline_re.sub('\n', n_body)
|
||||
|
||||
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
from .registry import registry, NotificationProfileType, AppriseProfileType
|
||||
|
||||
__all__ = ['registry', 'NotificationProfileType', 'AppriseProfileType']
|
||||
@@ -1,73 +0,0 @@
|
||||
"""
|
||||
Per-profile notification log.
|
||||
|
||||
Each profile gets its own log file at:
|
||||
{datastore_path}/notification-logs/{profile_uuid}.log
|
||||
|
||||
Entries are stored as JSON-lines (one JSON object per line).
|
||||
The file is capped at MAX_ENTRIES lines (oldest pruned first).
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
MAX_ENTRIES = 100
|
||||
_LOG_DIR = 'notification-logs'
|
||||
|
||||
|
||||
def _log_file(datastore_path: str, profile_uuid: str) -> str:
|
||||
return os.path.join(datastore_path, _LOG_DIR, f'{profile_uuid}.log')
|
||||
|
||||
|
||||
def write_profile_log(datastore_path: str, profile_uuid: str, *,
|
||||
watch_url: str = '',
|
||||
watch_uuid: str = '',
|
||||
status: str, # 'ok' | 'error' | 'test'
|
||||
message: str = ''):
|
||||
"""Append one log entry; prune to MAX_ENTRIES."""
|
||||
log_dir = os.path.join(datastore_path, _LOG_DIR)
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
|
||||
entry = json.dumps({
|
||||
'ts': datetime.now(tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'),
|
||||
'watch_url': watch_url[:200],
|
||||
'watch_uuid': watch_uuid,
|
||||
'status': status,
|
||||
'message': message[:500],
|
||||
}, ensure_ascii=False)
|
||||
|
||||
path = _log_file(datastore_path, profile_uuid)
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as fh:
|
||||
lines = [l for l in fh.read().splitlines() if l.strip()]
|
||||
except FileNotFoundError:
|
||||
lines = []
|
||||
|
||||
lines.append(entry)
|
||||
lines = lines[-MAX_ENTRIES:]
|
||||
|
||||
with open(path, 'w', encoding='utf-8') as fh:
|
||||
fh.write('\n'.join(lines) + '\n')
|
||||
|
||||
|
||||
def read_profile_log(datastore_path: str, profile_uuid: str) -> list:
|
||||
"""Return log entries as a list of dicts, newest first."""
|
||||
path = _log_file(datastore_path, profile_uuid)
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as fh:
|
||||
lines = [l.strip() for l in fh if l.strip()]
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
entries = []
|
||||
for line in reversed(lines):
|
||||
try:
|
||||
entries.append(json.loads(line))
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return entries
|
||||
|
||||
|
||||
def has_log(datastore_path: str, profile_uuid: str) -> bool:
|
||||
return os.path.exists(_log_file(datastore_path, profile_uuid))
|
||||
@@ -1,111 +0,0 @@
|
||||
"""
|
||||
Notification Profile Type plugin registry.
|
||||
|
||||
NotificationProfileType is the abstract base — the only contract is send().
|
||||
Plugins are free to use any delivery mechanism (Apprise, direct HTTP, SDK, etc.).
|
||||
|
||||
Built-in: AppriseProfileType (raw Apprise URL list).
|
||||
|
||||
Third-party plugins register additional types:
|
||||
|
||||
from changedetectionio.notification_profiles.registry import registry, NotificationProfileType
|
||||
|
||||
@registry.register
|
||||
class MyProfileType(NotificationProfileType):
|
||||
type_id = "mytype"
|
||||
display_name = "My Service"
|
||||
icon = "bell"
|
||||
template = "my_plugin/notification_profiles/types/mytype.html"
|
||||
|
||||
def send(self, config: dict, n_object: dict, datastore) -> bool:
|
||||
requests.post(config['webhook_url'], json={"text": n_object['notification_body']})
|
||||
return True
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class NotificationProfileType(ABC):
|
||||
type_id: str = NotImplemented
|
||||
display_name: str = NotImplemented
|
||||
icon: str = "bell" # feather icon name
|
||||
template: str = NotImplemented # Jinja2 partial rendered in the profile edit form
|
||||
|
||||
@abstractmethod
|
||||
def send(self, config: dict, n_object: dict, datastore) -> bool:
|
||||
"""
|
||||
Deliver the notification.
|
||||
|
||||
Args:
|
||||
config: The profile's config dict (type-specific fields).
|
||||
n_object: Fully-rendered NotificationContextData (title, body, format, etc.).
|
||||
datastore: App datastore for any extra lookups.
|
||||
|
||||
Returns True on success, False on failure (do not raise — log instead).
|
||||
"""
|
||||
|
||||
def validate(self, config: dict) -> None:
|
||||
"""Raise ValueError with a user-readable message on invalid config."""
|
||||
pass
|
||||
|
||||
def get_url_hint(self, config: dict) -> str:
|
||||
"""Short display string shown in the selector chip tooltip / dropdown row."""
|
||||
return ''
|
||||
|
||||
|
||||
class AppriseProfileType(NotificationProfileType):
|
||||
"""Delivers notifications via Apprise using a raw URL list."""
|
||||
|
||||
type_id = "apprise"
|
||||
display_name = "Apprise"
|
||||
icon = "bell"
|
||||
template = "notification_profiles/types/apprise.html"
|
||||
|
||||
def get_apprise_urls(self, config: dict) -> list:
|
||||
return config.get('notification_urls') or []
|
||||
|
||||
def send(self, config: dict, n_object, datastore) -> bool:
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
urls = self.get_apprise_urls(config)
|
||||
if not urls:
|
||||
return False
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
n_object = NotificationContextData(n_object)
|
||||
n_object['notification_urls'] = urls
|
||||
n_object['notification_title'] = config.get('notification_title') or n_object.get('notification_title')
|
||||
n_object['notification_body'] = config.get('notification_body') or n_object.get('notification_body')
|
||||
n_object['notification_format'] = config.get('notification_format') or n_object.get('notification_format')
|
||||
process_notification(n_object, datastore)
|
||||
return True
|
||||
|
||||
def get_url_hint(self, config: dict) -> str:
|
||||
urls = config.get('notification_urls') or []
|
||||
if urls:
|
||||
u = urls[0]
|
||||
return (u[:60] + '…') if len(u) > 60 else u
|
||||
return ''
|
||||
|
||||
|
||||
class _Registry:
|
||||
def __init__(self):
|
||||
self._types: dict = {}
|
||||
|
||||
def register(self, cls):
|
||||
"""Register a NotificationProfileType subclass. Usable as a decorator."""
|
||||
instance = cls()
|
||||
self._types[instance.type_id] = instance
|
||||
return cls
|
||||
|
||||
def get(self, type_id: str) -> NotificationProfileType:
|
||||
return self._types.get(type_id, self._types.get('apprise'))
|
||||
|
||||
def all(self) -> list:
|
||||
return list(self._types.values())
|
||||
|
||||
def choices(self) -> list:
|
||||
return [(t.type_id, t.display_name) for t in self._types.values()]
|
||||
|
||||
|
||||
registry = _Registry()
|
||||
registry.register(AppriseProfileType)
|
||||
@@ -1,49 +0,0 @@
|
||||
"""
|
||||
Resolve the full set of NotificationProfile objects that should fire for a given watch.
|
||||
|
||||
Merges profile UUIDs from: Watch → Tags → System (union, deduplicated).
|
||||
Mute cascade is checked separately via resolve_setting() before calling this.
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def resolve_notification_profiles(watch, datastore) -> list:
|
||||
"""
|
||||
Return list of (profile_dict, NotificationProfileType) tuples to fire for *watch*.
|
||||
|
||||
Profiles are deduplicated by UUID — if the same UUID appears at multiple levels
|
||||
it fires once, not multiple times.
|
||||
"""
|
||||
from changedetectionio.notification_profiles.registry import registry
|
||||
|
||||
all_profiles = datastore.data['settings']['application'].get('notification_profile_data', {})
|
||||
|
||||
seen = set()
|
||||
result = []
|
||||
|
||||
def _add(uuids):
|
||||
for uid in (uuids or []):
|
||||
if uid in seen:
|
||||
continue
|
||||
profile = all_profiles.get(uid)
|
||||
if not profile:
|
||||
logger.warning(f"Notification profile UUID {uid!r} not found, skipping")
|
||||
continue
|
||||
seen.add(uid)
|
||||
type_handler = registry.get(profile.get('type', 'apprise'))
|
||||
result.append((profile, type_handler))
|
||||
|
||||
# 1. Watch-level
|
||||
_add(watch.get('notification_profiles', []))
|
||||
|
||||
# 2. Tag/group level
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag in tags.values():
|
||||
_add(tag.get('notification_profiles', []))
|
||||
|
||||
# 3. System level
|
||||
_add(datastore.data['settings']['application'].get('notification_profiles', []))
|
||||
|
||||
return result
|
||||
@@ -54,152 +54,34 @@ def _check_cascading_vars(datastore, var_name, watch):
|
||||
return None
|
||||
|
||||
|
||||
class FormattableTimestamp(str):
|
||||
"""
|
||||
A str subclass representing a formatted datetime. As a plain string it renders
|
||||
with the default format, but can also be called with a custom format argument
|
||||
in Jinja2 templates:
|
||||
|
||||
{{ change_datetime }} → '2024-01-15 10:30:00 UTC'
|
||||
{{ change_datetime(format='%Y') }} → '2024'
|
||||
{{ change_datetime(format='%A') }} → 'Monday'
|
||||
{{ change_datetime(format='%Y-%m-%d') }} → '2024-01-15'
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
_DEFAULT_FORMAT = '%Y-%m-%d %H:%M:%S %Z'
|
||||
|
||||
def __new__(cls, timestamp):
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
dt_local = dt.astimezone(local_tz)
|
||||
try:
|
||||
formatted = dt_local.strftime(cls._DEFAULT_FORMAT)
|
||||
except Exception:
|
||||
formatted = dt_local.isoformat()
|
||||
instance = super().__new__(cls, formatted)
|
||||
instance._dt = dt_local
|
||||
return instance
|
||||
|
||||
def __call__(self, format=_DEFAULT_FORMAT):
|
||||
try:
|
||||
return self._dt.strftime(format)
|
||||
except Exception:
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableExtract(str):
|
||||
"""
|
||||
A str subclass that holds only the extracted changed fragments from a diff.
|
||||
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
|
||||
|
||||
{{ diff_changed_from }} → old value(s) only, e.g. "$99.99"
|
||||
{{ diff_changed_to }} → new value(s) only, e.g. "$109.99"
|
||||
|
||||
Multiple changed fragments are joined with newlines.
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
|
||||
extracted = extract_fn(raw)
|
||||
else:
|
||||
extracted = ''
|
||||
instance = super().__new__(cls, extracted)
|
||||
return instance
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
with the default options for that variant, but can be called with custom
|
||||
arguments in Jinja2 templates:
|
||||
|
||||
{{ diff }} → default diff output
|
||||
{{ diff(lines=5) }} → truncate to 5 lines
|
||||
{{ diff(added_only=true) }} → only show added lines
|
||||
{{ diff(removed_only=true) }} → only show removed lines
|
||||
{{ diff(context=3) }} → 3 lines of context around changes
|
||||
{{ diff(word_diff=false) }} → line-level diff instead of word-level
|
||||
{{ diff(lines=10, added_only=true) }} → combine args
|
||||
{{ diff_added(lines=5) }} → works on any diff_* variant too
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
|
||||
else:
|
||||
rendered = ''
|
||||
instance = super().__new__(cls, rendered)
|
||||
instance._prev = prev_snapshot
|
||||
instance._current = current_snapshot
|
||||
instance._base_kwargs = base_kwargs
|
||||
return instance
|
||||
|
||||
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
|
||||
word_diff=None, case_insensitive=False, ignore_junk=False):
|
||||
from changedetectionio import diff as diff_module
|
||||
kwargs = dict(self._base_kwargs)
|
||||
|
||||
if added_only:
|
||||
kwargs['include_removed'] = False
|
||||
if removed_only:
|
||||
kwargs['include_added'] = False
|
||||
if context:
|
||||
kwargs['context_lines'] = int(context)
|
||||
if word_diff is not None:
|
||||
kwargs['word_diff'] = bool(word_diff)
|
||||
if case_insensitive:
|
||||
kwargs['case_insensitive'] = True
|
||||
if ignore_junk:
|
||||
kwargs['ignore_junk'] = True
|
||||
|
||||
result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs)
|
||||
|
||||
if lines is not None:
|
||||
result = '\n'.join(result.splitlines()[:int(lines)])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
# ValidateJinja2Template() validates against the keynames of this dict to check for valid tokens in the body (user submission)
|
||||
super().__init__({
|
||||
'base_url': None,
|
||||
'change_datetime': FormattableTimestamp(time.time()),
|
||||
'current_snapshot': None,
|
||||
'diff': FormattableDiff('', ''),
|
||||
'diff_clean': FormattableDiff('', '', include_change_type_prefix=False),
|
||||
'diff_added': FormattableDiff('', '', include_removed=False),
|
||||
'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False),
|
||||
'diff_full': FormattableDiff('', '', include_equal=True),
|
||||
'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False),
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff': None,
|
||||
'diff_clean': None,
|
||||
'diff_added': None,
|
||||
'diff_added_clean': None,
|
||||
'diff_full': None,
|
||||
'diff_full_clean': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_removed_clean': None,
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
'prev_snapshot': None,
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'timestamp_from': None,
|
||||
'timestamp_to': None,
|
||||
'triggered_text': None,
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
})
|
||||
|
||||
# Apply any initial data passed in
|
||||
@@ -221,7 +103,7 @@ class NotificationContextData(dict):
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'):
|
||||
if key in ['uuid', 'time', 'watch_uuid']:
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
@@ -233,6 +115,24 @@ class NotificationContextData(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def timestamp_to_localtime(timestamp):
|
||||
# Format the date using locale-aware formatting with timezone
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
|
||||
# Get local timezone-aware datetime
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
local_dt = dt.astimezone(local_tz)
|
||||
|
||||
# Format date with timezone - using strftime for locale awareness
|
||||
try:
|
||||
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
|
||||
except:
|
||||
# Fallback if locale issues
|
||||
formatted_date = local_dt.isoformat()
|
||||
|
||||
return formatted_date
|
||||
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||
"""
|
||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||
@@ -250,12 +150,13 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Define base kwargs for each diff variant — these become the stored defaults
|
||||
# on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them
|
||||
# Define specifications for each diff variant
|
||||
diff_specs = {
|
||||
'diff': {'word_diff': word_diff},
|
||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||
@@ -268,27 +169,23 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
from changedetectionio.diff import extract_changed_from, extract_changed_to
|
||||
extract_specs = {
|
||||
'diff_changed_from': extract_changed_from,
|
||||
'diff_changed_to': extract_changed_to,
|
||||
}
|
||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
||||
@lru_cache(maxsize=4)
|
||||
def cached_render(kwargs_tuple):
|
||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
|
||||
# Only check and render diff keys that exist in NotificationContextData
|
||||
for key in NotificationContextData().keys():
|
||||
if not key.startswith('diff'):
|
||||
continue
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
continue
|
||||
if key in diff_specs:
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
elif key in extract_specs:
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
|
||||
rendered_count += 1
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
kwargs = diff_specs[key]
|
||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
@@ -301,7 +198,7 @@ def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggere
|
||||
'current_snapshot': current_snapshot,
|
||||
'prev_snapshot': prev_snapshot,
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'change_datetime': FormattableTimestamp(timestamp_changed) if timestamp_changed else None,
|
||||
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
@@ -496,7 +393,7 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
|
||||
'notification_body': body,
|
||||
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
|
||||
@@ -174,64 +174,6 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def get_html_head_extras():
|
||||
"""Return HTML to inject into the <head> of every page via base.html.
|
||||
|
||||
Plugins can use this to add <script>, <style>, or <link> tags that should
|
||||
be present on all pages. Return a raw HTML string or None.
|
||||
|
||||
IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
|
||||
sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
|
||||
work correctly. This hook is called inside a request context so url_for() is
|
||||
always available.
|
||||
|
||||
For small amounts of CSS/JS, return them inline — no file-serving needed::
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
return (
|
||||
'<style>.my-module-banner { color: red; }</style>\\n'
|
||||
'<script>console.log("my_module_content loaded");</script>'
|
||||
)
|
||||
|
||||
For larger assets, register your own lightweight Flask routes in the plugin
|
||||
module and point to them with url_for() so the sub-path prefix is handled
|
||||
automatically::
|
||||
|
||||
from flask import url_for, Response
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.flask_app import app as _app
|
||||
|
||||
MY_CSS = ".my-module-example { color: red; }"
|
||||
MY_JS = "console.log('my_module_content loaded');"
|
||||
|
||||
@_app.route('/my_module_content/css')
|
||||
def my_module_content_css():
|
||||
return Response(MY_CSS, mimetype='text/css',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@_app.route('/my_module_content/js')
|
||||
def my_module_content_js():
|
||||
return Response(MY_JS, mimetype='application/javascript',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
css = url_for('my_module_content_css')
|
||||
js = url_for('my_module_content_js')
|
||||
return (
|
||||
f'<link rel="stylesheet" href="{css}">\\n'
|
||||
f'<script src="{js}" defer></script>'
|
||||
)
|
||||
|
||||
Returns:
|
||||
str or None: Raw HTML string to inject inside <head>, or None
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -295,23 +237,14 @@ def register_builtin_fetchers():
|
||||
This is called from content_fetchers/__init__.py after all fetchers are imported
|
||||
to avoid circular import issues.
|
||||
"""
|
||||
from changedetectionio.content_fetchers import requests, puppeteer, webdriver_selenium
|
||||
from changedetectionio.content_fetchers.playwright import CDP, chrome, firefox, webkit
|
||||
from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium
|
||||
|
||||
# Register each built-in fetcher plugin
|
||||
if hasattr(requests, 'requests_plugin'):
|
||||
plugin_manager.register(requests.requests_plugin, 'builtin_requests')
|
||||
|
||||
if hasattr(CDP, 'cdp_plugin'):
|
||||
plugin_manager.register(CDP.cdp_plugin, 'builtin_playwright_cdp')
|
||||
|
||||
if hasattr(chrome, 'chrome_plugin'):
|
||||
plugin_manager.register(chrome.chrome_plugin, 'builtin_playwright_chrome')
|
||||
|
||||
if hasattr(firefox, 'firefox_plugin'):
|
||||
plugin_manager.register(firefox.firefox_plugin, 'builtin_playwright_firefox')
|
||||
|
||||
if hasattr(webkit, 'webkit_plugin'):
|
||||
plugin_manager.register(webkit.webkit_plugin, 'builtin_playwright_webkit')
|
||||
if hasattr(playwright, 'playwright_plugin'):
|
||||
plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')
|
||||
|
||||
if hasattr(puppeteer, 'puppeteer_plugin'):
|
||||
plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
|
||||
@@ -427,28 +360,57 @@ def get_active_plugins():
|
||||
|
||||
|
||||
def get_fetcher_capabilities(watch, datastore):
|
||||
"""Get capability flags for a watch's resolved fetcher.
|
||||
"""Get capability flags for a watch's fetcher.
|
||||
|
||||
Uses the BrowserProfile resolution chain (watch → tag → global → built-in)
|
||||
to determine the actual fetcher class, then reads its capability flags.
|
||||
Args:
|
||||
watch: The watch object/dict
|
||||
datastore: The datastore to resolve 'system' fetcher
|
||||
|
||||
Returns:
|
||||
dict: {'supports_browser_steps': bool, 'supports_screenshots': bool,
|
||||
'supports_xpath_element_data': bool}
|
||||
dict: Dictionary with capability flags:
|
||||
{
|
||||
'supports_browser_steps': bool,
|
||||
'supports_screenshots': bool,
|
||||
'supports_xpath_element_data': bool
|
||||
}
|
||||
"""
|
||||
from changedetectionio.model.browser_profile import resolve_browser_profile
|
||||
# Get the fetcher name from watch
|
||||
fetcher_name = watch.get('fetch_backend', 'system')
|
||||
|
||||
# Resolve 'system' to actual fetcher
|
||||
if fetcher_name == 'system':
|
||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
|
||||
# Get the fetcher class
|
||||
from changedetectionio import content_fetchers
|
||||
|
||||
profile = resolve_browser_profile(watch, datastore)
|
||||
fetcher_class = content_fetchers.get_fetcher(profile.fetch_backend)
|
||||
# Try to get from built-in fetchers first
|
||||
if hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
if fetcher_class is None:
|
||||
return {'supports_browser_steps': False, 'supports_screenshots': False, 'supports_xpath_element_data': False}
|
||||
# Try to get from plugin-provided fetchers
|
||||
# Query all plugins for registered fetchers
|
||||
plugin_fetchers = plugin_manager.hook.register_content_fetcher()
|
||||
for fetcher_registration in plugin_fetchers:
|
||||
if fetcher_registration:
|
||||
name, fetcher_class = fetcher_registration
|
||||
if name == fetcher_name:
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
# Default: no capabilities
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False),
|
||||
'supports_browser_steps': False,
|
||||
'supports_screenshots': False,
|
||||
'supports_xpath_element_data': False
|
||||
}
|
||||
|
||||
|
||||
@@ -644,20 +606,4 @@ def apply_update_finalize(update_handler, watch, datastore, processing_exception
|
||||
except Exception as e:
|
||||
# Don't let plugin errors crash the worker
|
||||
logger.error(f"Error in update_finalize hook: {e}")
|
||||
logger.exception(f"update_finalize hook exception details:")
|
||||
|
||||
|
||||
def collect_html_head_extras():
|
||||
"""Collect and combine HTML head extras from all plugins.
|
||||
|
||||
Called from a Flask template global so it always runs inside a request context.
|
||||
This means url_for() works correctly in plugin implementations, including when the
|
||||
app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
|
||||
sets SCRIPT_NAME so url_for() automatically prepends the prefix).
|
||||
|
||||
Returns:
|
||||
str: Combined HTML string to inject inside <head>, or empty string
|
||||
"""
|
||||
results = plugin_manager.hook.get_html_head_extras()
|
||||
parts = [r for r in results if r]
|
||||
return "\n".join(parts) if parts else ""
|
||||
logger.exception(f"update_finalize hook exception details:")
|
||||
@@ -9,15 +9,6 @@ Some suggestions for the future
|
||||
|
||||
- `graphical`
|
||||
|
||||
## API schema extension (`api.yaml`)
|
||||
|
||||
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
|
||||
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
|
||||
making `processor_config_<name>` a valid field on all watch create/update API calls.
|
||||
The fully merged spec is served live at `/api/v1/full-spec`.
|
||||
|
||||
See `restock_diff/api.yaml` for a working example.
|
||||
|
||||
## Todo
|
||||
|
||||
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
||||
|
||||
@@ -341,18 +341,6 @@ def get_processor_descriptions():
|
||||
return descriptions
|
||||
|
||||
|
||||
def wcag_text_color(hex_bg: str) -> str:
|
||||
"""Return #000000 or #ffffff for maximum WCAG contrast against hex_bg."""
|
||||
hex_bg = hex_bg.lstrip('#')
|
||||
if len(hex_bg) != 6:
|
||||
return '#000000'
|
||||
r, g, b = (int(hex_bg[i:i+2], 16) / 255 for i in (0, 2, 4))
|
||||
def lin(c):
|
||||
return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
|
||||
L = 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)
|
||||
return '#000000' if L > 0.179 else '#ffffff'
|
||||
|
||||
|
||||
def generate_processor_badge_colors(processor_name):
|
||||
"""
|
||||
Generate consistent colors for a processor badge based on its name.
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
import asyncio
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
from copy import deepcopy
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
@@ -23,7 +20,6 @@ class difference_detection_processor():
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
preferred_proxy_override = None # Set externally to force a specific proxy (e.g. proxy checker)
|
||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||
last_raw_content_checksum = None
|
||||
|
||||
@@ -37,8 +33,6 @@ class difference_detection_processor():
|
||||
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
|
||||
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
if self.watch is None:
|
||||
raise KeyError(f"Watch UUID {watch_uuid} not found in datastore (deleted before processing?)")
|
||||
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
@@ -100,82 +94,81 @@ class difference_detection_processor():
|
||||
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
|
||||
self.last_raw_content_checksum = None
|
||||
|
||||
async def validate_iana_url(self):
|
||||
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
|
||||
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
|
||||
through call_browser().
|
||||
"""
|
||||
if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
|
||||
return
|
||||
parsed = urlparse(self.watch.link)
|
||||
if not parsed.hostname:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
|
||||
raise Exception(
|
||||
f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
|
||||
)
|
||||
|
||||
async def call_browser(self):
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file:, file:/, file:// access
|
||||
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception("file:// type access is denied for security reasons.")
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
await self.validate_iana_url()
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Resolve the full browser profile for this watch (watch → tag → global → built-in)
|
||||
profile = resolve_browser_profile(self.watch, self.datastore)
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(
|
||||
uuid=self.watch.get('uuid'))
|
||||
|
||||
# PDFs always use the requests fetcher — browsers render them in an embedded viewer
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
custom_browser_connection_url = None
|
||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'html_webdriver'
|
||||
custom_browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
||||
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
||||
# @todo needs test to or a fix
|
||||
if self.watch.is_pdf:
|
||||
profile = BUILTIN_REQUESTS
|
||||
prefer_fetch_backend = "html_requests"
|
||||
|
||||
# Resolve proxy for the target URL fetch.
|
||||
# Note: browser_connection_url is the WebSocket endpoint to reach the remote browser,
|
||||
# which is separate from the proxy used by the browser to fetch target pages.
|
||||
proxy_url = self.datastore.get_proxy_url_for_watch(self.watch.get('uuid'), override_id=self.preferred_proxy_override)
|
||||
if proxy_url:
|
||||
logger.debug(f"Proxy '{proxy_url}' for {url}")
|
||||
|
||||
logger.debug(f"BrowserProfile '{profile.get_machine_name()}' (fetcher={profile.fetch_backend}) for watch {self.watch['uuid']}")
|
||||
|
||||
# Select the fetcher class
|
||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_class_name = profile.get_fetcher_class_name()
|
||||
if hasattr(content_fetchers, prefer_fetch_backend):
|
||||
# @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
|
||||
if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
|
||||
# This is never supported in selenium anyway
|
||||
logger.warning(
|
||||
"Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
|
||||
from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
|
||||
fetcher_obj = playwright_fetcher
|
||||
else:
|
||||
fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
|
||||
else:
|
||||
# What it referenced doesnt exist, Just use a default
|
||||
fetcher_obj = getattr(content_fetchers, "html_requests")
|
||||
|
||||
fetcher_obj = content_fetchers.get_fetcher(fetcher_class_name)
|
||||
if fetcher_obj is None:
|
||||
logger.warning(f"Fetcher '{fetcher_class_name}' not found, falling back to requests")
|
||||
fetcher_obj = content_fetchers.get_fetcher('requests')
|
||||
elif self.watch.has_browser_steps and not getattr(fetcher_obj, 'supports_browser_steps', False):
|
||||
# Browser steps require Playwright — override if the resolved fetcher doesn't support them
|
||||
logger.warning(f"Fetcher '{fetcher_class_name}' does not support browser steps, overriding to Playwright")
|
||||
fetcher_obj = content_fetchers.get_fetcher('playwright')
|
||||
proxy_url = None
|
||||
if preferred_proxy_id:
|
||||
# Custom browser endpoints should NOT have a proxy added
|
||||
if not prefer_fetch_backend.startswith('extra_browser_'):
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
self.fetcher = fetcher_obj(
|
||||
proxy_override=proxy_url,
|
||||
custom_browser_connection_url=profile.browser_connection_url,
|
||||
screenshot_format=self.screenshot_format,
|
||||
# BrowserProfile fields — browser fetchers use these; html_requests ignores them
|
||||
viewport_width=profile.viewport_width,
|
||||
viewport_height=profile.viewport_height,
|
||||
block_images=profile.block_images,
|
||||
block_fonts=profile.block_fonts,
|
||||
profile_user_agent=profile.user_agent,
|
||||
ignore_https_errors=profile.ignore_https_errors,
|
||||
locale=profile.locale,
|
||||
service_workers=profile.service_workers,
|
||||
extra_delay=profile.extra_delay,
|
||||
)
|
||||
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
custom_browser_connection_url=custom_browser_connection_url,
|
||||
screenshot_format=self.screenshot_format
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', []))
|
||||
@@ -185,17 +178,9 @@ class difference_detection_processor():
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
# Browser profile: UA override (lowest priority — watch headers override this)
|
||||
if profile.user_agent:
|
||||
request_headers['User-Agent'] = profile.user_agent
|
||||
|
||||
# Browser profile: custom headers (override profile UA, but watch headers override these)
|
||||
if profile.custom_headers:
|
||||
for line in profile.custom_headers.splitlines():
|
||||
line = line.strip()
|
||||
if not line.startswith('#') and ':' in line:
|
||||
k, v = line.split(':', 1)
|
||||
request_headers[k.strip()] = v.strip()
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
request_headers.update(self.watch.get('headers', {}))
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
@@ -252,17 +237,6 @@ class difference_detection_processor():
|
||||
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
self.fetcher.disk_cleanup_after_fetch()
|
||||
|
||||
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
|
||||
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
|
||||
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
# Get error information for the template
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
fetcher_supports_screenshots = watch.fetcher_supports_screenshots
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
@@ -59,7 +62,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
screenshot=screenshot_url,
|
||||
fetcher_supports_screenshots=fetcher_supports_screenshots,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
extra_title=f" - {watch.label} - Extract Data",
|
||||
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
|
||||
|
||||
@@ -100,13 +100,7 @@ class guess_stream_type():
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||
self.is_plaintext = True
|
||||
else:
|
||||
self.is_json = True
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
# magic will call a rss document 'xml'
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
@@ -11,8 +10,6 @@ supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
|
||||
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
@@ -34,7 +31,6 @@ class Restock(dict):
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
# @todo locale needs to be the locale of the webpage
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
@@ -66,22 +62,15 @@ class Restock(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def get_price_from_history_str(history_str):
|
||||
m = _price_re.search(history_str)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
try:
|
||||
return str(Decimal(m.group(1)))
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
class Watch(BaseWatch):
|
||||
def __init__(self, *arg, **kw):
|
||||
super().__init__(*arg, **kw)
|
||||
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||
|
||||
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing' : 'in_stock_only'
|
||||
} #@todo update
|
||||
|
||||
def clear_watch(self):
|
||||
super().clear_watch()
|
||||
@@ -90,27 +79,13 @@ class Watch(BaseWatch):
|
||||
def extra_notification_token_values(self):
|
||||
values = super().extra_notification_token_values()
|
||||
values['restock'] = self.get('restock', {})
|
||||
|
||||
values['restock']['previous_price'] = None
|
||||
if self.history_n >= 2:
|
||||
history = self.history
|
||||
if history and len(history) >=2:
|
||||
"""Unfortunately for now timestamp is stored as string key"""
|
||||
sorted_keys = sorted(list(history), key=lambda x: int(x))
|
||||
sorted_keys.reverse()
|
||||
|
||||
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
|
||||
if price_str:
|
||||
values['restock']['previous_price'] = get_price_from_history_str(price_str)
|
||||
return values
|
||||
|
||||
def extra_notification_token_placeholder_info(self):
|
||||
values = super().extra_notification_token_placeholder_info()
|
||||
|
||||
values.append(('restock.price', "Price detected"))
|
||||
values.append(('restock.in_stock', "In stock status"))
|
||||
values.append(('restock.original_price', "Original price at first check"))
|
||||
values.append(('restock.previous_price', "Previous price in history"))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
components:
|
||||
schemas:
|
||||
processor_config_restock_diff:
|
||||
type: object
|
||||
description: Configuration for the restock_diff processor (restock and price tracking)
|
||||
properties:
|
||||
in_stock_processing:
|
||||
type: string
|
||||
enum: [in_stock_only, all_changes, 'off']
|
||||
default: in_stock_only
|
||||
description: |
|
||||
When to trigger on stock changes:
|
||||
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
|
||||
- `all_changes`: Trigger on any availability change
|
||||
- `off`: Disable stock/availability tracking
|
||||
follow_price_changes:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Monitor and track price changes
|
||||
price_change_min:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price drops below this value
|
||||
price_change_max:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price rises above this value
|
||||
price_change_threshold_percent:
|
||||
type: [number, 'null']
|
||||
minimum: 0
|
||||
maximum: 100
|
||||
description: Minimum price change percentage since the original price to trigger a notification
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
'url': 'https://example.com/product',
|
||||
'processor': 'restock_diff',
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 5,
|
||||
}
|
||||
}
|
||||
response = requests.post('http://localhost:5000/api/v1/watch',
|
||||
headers=headers, json=data)
|
||||
print(response.json())
|
||||
|
||||
/watch/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
uuid = 'YOUR-UUID'
|
||||
data = {
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'all_changes',
|
||||
'follow_price_changes': True,
|
||||
'price_change_min': 10.00,
|
||||
'price_change_max': 500.00,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
|
||||
/tag/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"overrides_watch": true,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 10
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
tag_uuid = 'YOUR-TAG-UUID'
|
||||
data = {
|
||||
'overrides_watch': True,
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 10,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
|
||||
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
processor_config_restock_diff = FormField(RestockSettingsForm)
|
||||
restock_settings = FormField(RestockSettingsForm)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return _l('Restock & Price Detection')
|
||||
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):
|
||||
|
||||
output += """
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
<script>
|
||||
<script>
|
||||
$(document).ready(function () {
|
||||
toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
|
||||
toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
|
||||
});
|
||||
</script>
|
||||
|
||||
<fieldset id="restock-fieldset-price-group">
|
||||
<div class="pure-control-group">
|
||||
<fieldset class="pure-group inline-radio">
|
||||
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
|
||||
{{ render_field(form.restock_settings.in_stock_processing) }}
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
|
||||
{{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
|
||||
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
{{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
|
||||
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
|
||||
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
|
||||
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||
</fieldset>
|
||||
</fieldset>
|
||||
</div>
|
||||
</fieldset>
|
||||
"""
|
||||
|
||||
@@ -437,32 +437,26 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
#useless
|
||||
# from ...html_tools import html_to_text
|
||||
# text = html_to_text(self.fetcher.content)
|
||||
# logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
# if not len(text):
|
||||
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
# raise ReplyWithContentButNoText(url=watch.link,
|
||||
# status_code=self.fetcher.get_last_status_code(),
|
||||
# screenshot=self.fetcher.screenshot,
|
||||
# html_content=self.fetcher.content,
|
||||
# xpath_data=self.fetcher.xpath_data
|
||||
# )
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Which restock settings to compare against?
|
||||
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
|
||||
_extra_config = self.get_extra_watch_config('restock_diff.json')
|
||||
restock_settings = _extra_config.get('restock_diff') or {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
}
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
|
||||
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('processor_config_restock_diff') or {}
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
|
||||
break
|
||||
|
||||
@@ -489,9 +483,19 @@ class perform_site_check(difference_detection_processor):
|
||||
# @TODO !!! some setting like "Use as fallback" or "always use", "t
|
||||
if not (has_price and has_availability) or True:
|
||||
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
|
||||
# Use the actual resolved fetcher name from the fetcher instance
|
||||
fetcher_name = self.watch.effective_browser_profile.fetch_backend
|
||||
logger.debug(f"Resolved effective fetcher: {fetcher_name}")
|
||||
fetcher_name = watch.get('fetch_backend', 'html_requests')
|
||||
|
||||
# Resolve 'system' to the actual fetcher being used
|
||||
# This allows plugins to work even when watch uses "system settings default"
|
||||
if fetcher_name == 'system':
|
||||
# Get the actual fetcher that was used (from self.fetcher)
|
||||
# Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
|
||||
actual_fetcher = type(self.fetcher).__name__
|
||||
if 'html_requests' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_requests'
|
||||
elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_webdriver'
|
||||
logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")
|
||||
|
||||
# Try plugin override - plugins can decide if they support this fetcher
|
||||
if fetcher_name:
|
||||
|
||||
@@ -283,7 +283,4 @@ def query_price_availability(extracted_data):
|
||||
if not result.get('availability') and 'availability' in microdata:
|
||||
result['availability'] = microdata['availability']
|
||||
|
||||
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
|
||||
# using something like babel you need to know the locale of the website and even then it can be problematic
|
||||
# we dont really do anything with the price data so far.. so just accept it the way it comes.
|
||||
return result
|
||||
|
||||
@@ -36,7 +36,7 @@ def _task(watch, update_handler):
|
||||
|
||||
|
||||
def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
|
||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
from changedetectionio.model.Watch import model as watch_model
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
@@ -154,7 +154,11 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
fetcher_supports_screenshots = watch.fetcher_supports_screenshots
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
@@ -210,7 +214,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
extra_title=f" - {watch.label} - History",
|
||||
extract_form=extract_form,
|
||||
from_version=str(from_version),
|
||||
fetcher_supports_screenshots=fetcher_supports_screenshots,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
|
||||
@@ -85,10 +85,6 @@ class FilterConfig:
|
||||
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
|
||||
return self._subtractive_selectors_cache
|
||||
|
||||
@property
|
||||
def extract_lines_containing(self):
|
||||
return self._get_merged_rules('extract_lines_containing')
|
||||
|
||||
@property
|
||||
def extract_text(self):
|
||||
return self._get_merged_rules('extract_text')
|
||||
@@ -105,30 +101,6 @@ class FilterConfig:
|
||||
def text_should_not_be_present(self):
|
||||
return self._get_merged_rules('text_should_not_be_present')
|
||||
|
||||
def get_filter_config_hash(self):
|
||||
"""
|
||||
Stable hash of the effective filter configuration.
|
||||
|
||||
Used by the skip-logic in run_changedetection() so that any change to
|
||||
global settings, tag overrides, or watch filters automatically invalidates
|
||||
the raw-content-unchanged shortcut — without needing scattered
|
||||
clear_all_last_checksums() calls at every settings mutation site.
|
||||
"""
|
||||
app = self.datastore.data['settings']['application']
|
||||
config = {
|
||||
'extract_lines_containing': sorted(self.extract_lines_containing),
|
||||
'extract_text': sorted(self.extract_text),
|
||||
'ignore_text': sorted(self.ignore_text),
|
||||
'include_filters': sorted(self.include_filters),
|
||||
'subtractive_selectors': sorted(self.subtractive_selectors),
|
||||
'text_should_not_be_present': sorted(self.text_should_not_be_present),
|
||||
'trigger_text': sorted(self.trigger_text),
|
||||
# Global processing flags not captured by the filter lists above
|
||||
'ignore_whitespace': app.get('ignore_whitespace', False),
|
||||
'strip_ignored_lines': app.get('strip_ignored_lines', False),
|
||||
}
|
||||
return hashlib.md5(json.dumps(config, sort_keys=True).encode()).hexdigest()
|
||||
|
||||
@property
|
||||
def has_include_filters(self):
|
||||
return bool(self.include_filters) and bool(self.include_filters[0].strip())
|
||||
@@ -163,17 +135,6 @@ class ContentTransformer:
|
||||
text = text.replace("\n\n", "\n")
|
||||
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
@staticmethod
|
||||
def extract_lines_containing(text, substrings):
|
||||
"""Keep only lines that contain at least one of the given substrings (case-insensitive)."""
|
||||
needles = [s.lower() for s in substrings if s.strip()]
|
||||
if not needles:
|
||||
return text
|
||||
return '\n'.join(
|
||||
line for line in text.splitlines()
|
||||
if any(needle in line.lower() for needle in needles)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def extract_by_regex(text, regex_patterns):
|
||||
"""Extract text matching regex patterns."""
|
||||
@@ -386,7 +347,6 @@ class ContentProcessor:
|
||||
def extract_text_from_html(self, html_content, stream_content_type):
|
||||
"""Convert HTML to plain text."""
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
|
||||
return html_tools.html_to_text(
|
||||
html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
@@ -416,26 +376,19 @@ class perform_site_check(difference_detection_processor):
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||
|
||||
# Build filter config up front so we can hash it for the skip check.
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
current_filter_config_hash = filter_config.get_filter_config_hash()
|
||||
|
||||
# Skip only when ALL of these hold:
|
||||
# 1. raw HTML is unchanged
|
||||
# 2. watch config was not edited (was_edited covers per-watch field changes)
|
||||
# 3. effective filter config is unchanged (covers global/tag setting changes that
|
||||
# bypass was_edited — e.g. global_ignore_text, global_subtractive_selectors)
|
||||
# last_filter_config_hash being False means first run or upgrade: don't skip.
|
||||
# Skip processing only if BOTH conditions are true:
|
||||
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||
# separate checks for trigger_text or other processing rules.
|
||||
if (not force_reprocess and
|
||||
not watch.was_edited and
|
||||
self.last_raw_content_checksum and
|
||||
self.last_raw_content_checksum == current_raw_document_checksum and
|
||||
watch.get('last_filter_config_hash') and
|
||||
watch.get('last_filter_config_hash') == current_filter_config_hash):
|
||||
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
# Initialize remaining components
|
||||
# Initialize components
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
||||
transformer = ContentTransformer()
|
||||
rule_engine = RuleEngine()
|
||||
@@ -456,7 +409,6 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||
update_obj['last_filter_config_hash'] = current_filter_config_hash
|
||||
|
||||
# === CONTENT PREPROCESSING ===
|
||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
||||
@@ -550,10 +502,6 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# === LINE FILTER (plain-text substring) ===
|
||||
if filter_config.extract_lines_containing:
|
||||
stripped_text = transformer.extract_lines_containing(stripped_text, filter_config.extract_lines_containing)
|
||||
|
||||
# === REGEX EXTRACTION ===
|
||||
if filter_config.extract_text:
|
||||
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
|
||||
@@ -587,8 +535,8 @@ class perform_site_check(difference_detection_processor):
|
||||
# === BLOCKING RULES EVALUATION ===
|
||||
blocked = False
|
||||
|
||||
# Check trigger_text - use text_for_checksuming so ignore_text can suppress trigger_text
|
||||
if rule_engine.evaluate_trigger_text(text_for_checksuming, filter_config.trigger_text):
|
||||
# Check trigger_text
|
||||
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
|
||||
blocked = True
|
||||
|
||||
# Check text_should_not_be_present
|
||||
|
||||
@@ -29,11 +29,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Perform the operation
|
||||
if op == 'pause':
|
||||
watch.toggle_pause()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
||||
elif op == 'mute':
|
||||
watch.toggle_mute()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
||||
elif op == 'recheck':
|
||||
# Import here to avoid circular imports
|
||||
|
||||
@@ -199,31 +199,8 @@ def handle_watch_update(socketio, **kwargs):
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
|
||||
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
|
||||
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
|
||||
The exception is caught inside run_wsgi and routed to self.server.log() — it never
|
||||
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
|
||||
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
|
||||
disconnect correctly via its own disconnect handler and timeout logic."""
|
||||
try:
|
||||
from werkzeug.serving import BaseWSGIServer
|
||||
_original_log = BaseWSGIServer.log
|
||||
|
||||
def _filtered_log(self, type, message, *args):
|
||||
if type == 'error' and 'write() before start_response' in message:
|
||||
return
|
||||
_original_log(self, type, message, *args)
|
||||
|
||||
BaseWSGIServer.log = _filtered_log
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
_suppress_werkzeug_ws_abrupt_disconnect_noise()
|
||||
|
||||
import platform
|
||||
import sys
|
||||
|
||||
@@ -368,4 +345,4 @@ def init_socketio(app, datastore):
|
||||
|
||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
||||
return socketio
|
||||
return socketio
|
||||
@@ -44,12 +44,12 @@ data_sanity_test () {
|
||||
cd ..
|
||||
TMPDIR=$(mktemp -d)
|
||||
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
||||
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
||||
PID=$!
|
||||
sleep 5
|
||||
kill $PID
|
||||
sleep 2
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR &
|
||||
./changedetection.py -p $PORT_N -d $TMPDIR &
|
||||
PID=$!
|
||||
sleep 5
|
||||
# On a restart the URL should still be there
|
||||
|
||||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 10 KiB |
@@ -1,20 +1,5 @@
|
||||
function checkDiscordHtmlWarning() {
|
||||
var urls = $('textarea.notification-urls').val() || '';
|
||||
var format = $('select.notification-format').val() || '';
|
||||
var isDiscord = /discord:\/\/|https:\/\/discord(?:app)?\.com\/api/i.test(urls);
|
||||
var isHtml = format === 'html' || format === 'htmlcolor';
|
||||
if (isDiscord && isHtml) {
|
||||
$('#discord-html-format-warning').show();
|
||||
} else {
|
||||
$('#discord-html-format-warning').hide();
|
||||
}
|
||||
}
|
||||
|
||||
$(document).ready(function () {
|
||||
|
||||
$('textarea.notification-urls, select.notification-format').on('change input', checkDiscordHtmlWarning);
|
||||
checkDiscordHtmlWarning();
|
||||
|
||||
$('#add-email-helper').click(function (e) {
|
||||
e.preventDefault();
|
||||
email = prompt("Destination email");
|
||||
|
||||
@@ -116,14 +116,6 @@ $(document).ready(function () {
|
||||
$('#realtime-conn-error').show();
|
||||
});
|
||||
|
||||
// Tell the server we're leaving cleanly so it can release the connection
|
||||
// immediately rather than waiting for a timeout.
|
||||
// Note: this only fires for voluntary closes (tab/window close, navigation away).
|
||||
// Hard kills, crashes and network drops will still timeout normally on the server.
|
||||
window.addEventListener('beforeunload', function () {
|
||||
socket.disconnect();
|
||||
});
|
||||
|
||||
socket.on('queue_size', function (data) {
|
||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||
if(queueSizePagerInfoText) {
|
||||
|
||||
@@ -102,9 +102,7 @@
|
||||
}
|
||||
|
||||
// Navigate to search results (always redirect to watchlist home)
|
||||
// Use base_path if available (for sub-path deployments like /enlighten-richerx)
|
||||
const basePath = typeof base_path !== 'undefined' ? base_path : '';
|
||||
window.location.href = basePath + '/?' + params.toString();
|
||||
window.location.href = '/?' + params.toString();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -4,7 +4,7 @@ $(document).ready(function(){
|
||||
});
|
||||
|
||||
var checkUserVal = function(){
|
||||
if($('#fetch_backend input:checked').val()=='requests') {
|
||||
if($('#fetch_backend input:checked').val()=='html_requests') {
|
||||
$('#request-override').show();
|
||||
$('#webdriver-stepper').hide();
|
||||
} else {
|
||||
|
||||
@@ -3,40 +3,21 @@ $(document).ready(function () {
|
||||
// Lazy Hide/Show elements mechanism
|
||||
$('[data-visible-for]').hide();
|
||||
function show_related_elem(e) {
|
||||
var name = $(e).attr('name');
|
||||
var val = $(e).val();
|
||||
var n = name + "=" + val;
|
||||
|
||||
// Resolve browser_profile select → underlying fetch_backend class name
|
||||
// browserProfileFetcherMap is injected by the page as {machine_name: 'playwright', ...}
|
||||
if (name && name.endsWith('browser_profile') && typeof browserProfileFetcherMap !== 'undefined') {
|
||||
var fetcherClass = val === 'system'
|
||||
? (typeof default_system_fetch_backend !== 'undefined' ? default_system_fetch_backend : null)
|
||||
: browserProfileFetcherMap[val];
|
||||
if (fetcherClass) {
|
||||
n = 'fetch_backend=' + fetcherClass;
|
||||
}
|
||||
} else if (n === 'fetch_backend=system') {
|
||||
var n = $(e).attr('name') + "=" + $(e).val();
|
||||
if (n === 'fetch_backend=system') {
|
||||
n = "fetch_backend=" + default_system_fetch_backend;
|
||||
}
|
||||
$(`[data-visible-for~="${n}"]`).show();
|
||||
}
|
||||
|
||||
$('select, :radio').on('change', function (e) {
|
||||
$(`[data-visible-for]`).hide();
|
||||
$('.advanced-options').hide();
|
||||
show_related_elem(this);
|
||||
});
|
||||
// Retain original click/keyup handling for radio buttons
|
||||
$(':radio').on('keyup keypress blur click', function (e) {
|
||||
$(':radio').on('keyup keypress blur change click', function (e) {
|
||||
$(`[data-visible-for]`).hide();
|
||||
$('.advanced-options').hide();
|
||||
show_related_elem(this);
|
||||
});
|
||||
|
||||
$(':radio:checked, select').each(function (e) {
|
||||
$(':radio:checked').each(function (e) {
|
||||
show_related_elem(this);
|
||||
});
|
||||
})
|
||||
|
||||
|
||||
// Show advanced
|
||||
@@ -45,4 +26,4 @@ $(document).ready(function () {
|
||||
$(this).toggle();
|
||||
})
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1 +1 @@
|
||||
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces;overflow-wrap:anywhere}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
|
||||
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
|
||||
|
||||
@@ -62,7 +62,6 @@ body.difference-page {
|
||||
|
||||
pre {
|
||||
white-space: break-spaces;
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -22,8 +22,6 @@ import uuid as uuid_builder
|
||||
from loguru import logger
|
||||
from blinker import signal
|
||||
|
||||
from ..model.Tags import TagsDict
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
@@ -123,11 +121,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
if 'application' in settings_data['settings']:
|
||||
self.__data['settings']['application'].update(settings_data['settings']['application'])
|
||||
|
||||
# Use our Tags dict with cleanup helpers etc
|
||||
# @todo Same for Watches
|
||||
existing_tags = settings_data.get('settings', {}).get('application', {}).get('tags') or {}
|
||||
self.__data['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=self.datastore_path)
|
||||
|
||||
# More or less for the old format which had this data in the one url-watches.json
|
||||
# cant hurt to leave it here,
|
||||
if 'watching' in settings_data:
|
||||
@@ -143,7 +136,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
self.__data['settings']['application']['tags'][uuid] = Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self,
|
||||
__datastore=self.__data,
|
||||
default=tag
|
||||
)
|
||||
logger.info(f"Tag: {uuid} {tag['title']}")
|
||||
@@ -203,11 +196,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
self.datastore_path = datastore_path
|
||||
|
||||
# Initialize data structure
|
||||
self.__data = App.model(datastore_path=datastore_path)
|
||||
self.__data = App.model()
|
||||
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
|
||||
|
||||
# Base definition for all watchers (deepcopy part of #569)
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, __datastore=self, default={}))
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, __datastore=self.__data, default={}))
|
||||
|
||||
# Load build SHA if available (Docker deployments)
|
||||
if path.isfile('changedetectionio/source.txt'):
|
||||
@@ -245,10 +238,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# Maybe they copied a bunch of watch subdirs across too
|
||||
self._load_state()
|
||||
|
||||
# Apply env-var browser config after state is fully loaded so we can safely
|
||||
# read existing settings without risk of being overwritten.
|
||||
self.preconfigure_browser_profiles_based_on_env()
|
||||
|
||||
def init_fresh_install(self, include_default_watches, version_tag):
|
||||
# Generate app_guid FIRST (required for all operations)
|
||||
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
|
||||
@@ -272,11 +261,13 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
if include_default_watches:
|
||||
self.add_watch(
|
||||
url='https://news.ycombinator.com/',
|
||||
tag='Tech news'
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
self.add_watch(
|
||||
url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io'
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
|
||||
# Create changedetection.json immediately
|
||||
@@ -333,64 +324,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
if entity.get('processor') != 'text_json_diff':
|
||||
logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
|
||||
|
||||
entity = watch_class(datastore_path=self.datastore_path, __datastore=self, default=entity)
|
||||
entity = watch_class(datastore_path=self.datastore_path, __datastore=self.__data, default=entity)
|
||||
return entity
|
||||
|
||||
def preconfigure_browser_profiles_based_on_env(self):
|
||||
"""Instantiate browser profiles from environment variables and store them.
|
||||
|
||||
Always runs at the end of reload_state() — covers fresh installs,
|
||||
existing datastores, and server restarts. Env vars always win so that
|
||||
changing PLAYWRIGHT_DRIVER_URL and restarting is reflected immediately.
|
||||
|
||||
Creates BrowserProfile instances from env vars and stores them in
|
||||
``settings.application.browser_profiles`` under their machine names,
|
||||
then sets ``settings.application.browser_profile`` to that profile as
|
||||
the system-wide default.
|
||||
"""
|
||||
from changedetectionio.model import browser_profile as bp
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
store_profiles = self.__data['settings']['application'].setdefault('browser_profiles', {})
|
||||
service_workers = os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow')
|
||||
extra_delay = int(os.getenv('WEBDRIVER_DELAY_BEFORE_CONTENT_READY', 0))
|
||||
configured_profile = None
|
||||
|
||||
playwright_url = os.getenv('PLAYWRIGHT_DRIVER_URL')
|
||||
if playwright_url:
|
||||
playwright_url = playwright_url.strip('"')
|
||||
builtin = bp.BUILTIN_PUPPETEER if strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) else bp.BUILTIN_PLAYWRIGHT
|
||||
profile = bp.BrowserProfile(
|
||||
name=builtin.name,
|
||||
fetch_backend=builtin.fetch_backend,
|
||||
browser_connection_url=playwright_url,
|
||||
service_workers=service_workers,
|
||||
extra_delay=extra_delay,
|
||||
is_builtin=True,
|
||||
)
|
||||
logger.debug(f"Configuring browser profile '{profile.get_machine_name()}' from env")
|
||||
store_profiles[profile.get_machine_name()] = profile.model_dump()
|
||||
configured_profile = profile
|
||||
|
||||
webdriver_url = os.getenv('WEBDRIVER_URL')
|
||||
if webdriver_url:
|
||||
profile = bp.BrowserProfile(
|
||||
name=bp.BUILTIN_SELENIUM.name,
|
||||
fetch_backend=bp.BUILTIN_SELENIUM.fetch_backend,
|
||||
browser_connection_url=webdriver_url.strip('"'),
|
||||
extra_delay=extra_delay,
|
||||
is_builtin=True,
|
||||
)
|
||||
logger.debug(f"Configuring browser profile '{profile.get_machine_name()}' from env")
|
||||
store_profiles[profile.get_machine_name()] = profile.model_dump()
|
||||
if not configured_profile:
|
||||
configured_profile = profile
|
||||
|
||||
if configured_profile:
|
||||
logger.debug(f"Setting system default browser profile to '{configured_profile.get_machine_name()}'")
|
||||
self.__data['settings']['application']['browser_profile'] = configured_profile.get_machine_name()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FileSavingDataStore Abstract Method Implementations
|
||||
# ============================================================================
|
||||
@@ -419,17 +355,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# Deep copy settings to avoid modifying the original
|
||||
settings_copy = copy.deepcopy(self.__data['settings'])
|
||||
|
||||
# Is saved as {uuid}/tag.json
|
||||
settings_copy['application']['tags'] = {}
|
||||
|
||||
# Serialize BrowserProfile Pydantic instances to plain dicts for JSON storage
|
||||
raw_profiles = settings_copy['application'].get('browser_profiles', {})
|
||||
from changedetectionio.model.browser_profile import BrowserProfile
|
||||
settings_copy['application']['browser_profiles'] = {
|
||||
k: v.model_dump() if isinstance(v, BrowserProfile) else v
|
||||
for k, v in raw_profiles.items()
|
||||
}
|
||||
|
||||
return {
|
||||
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
|
||||
'app_guid': self.__data.get('app_guid'),
|
||||
@@ -486,7 +411,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
return Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self,
|
||||
__datastore=self.__data,
|
||||
default=entity_dict
|
||||
)
|
||||
|
||||
@@ -793,11 +718,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
return False
|
||||
|
||||
if not is_safe_valid_url(url):
|
||||
from flask import has_request_context
|
||||
if has_request_context():
|
||||
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
|
||||
else:
|
||||
logger.error(f"add_watch: URL '{url}' is not permitted or invalid, skipping.")
|
||||
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
|
||||
|
||||
return None
|
||||
|
||||
# Check PAGE_WATCH_LIMIT if set
|
||||
@@ -832,7 +754,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
# If the processor also has its own Watch implementation
|
||||
watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
|
||||
new_watch = watch_class(datastore_path=self.datastore_path, __datastore=self, url=url)
|
||||
new_watch = watch_class(datastore_path=self.datastore_path, __datastore=self.__data, url=url)
|
||||
|
||||
new_uuid = new_watch.get('uuid')
|
||||
|
||||
@@ -917,16 +839,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
return proxy_list if len(proxy_list) else None
|
||||
|
||||
def get_proxy_url_for_watch(self, uuid, override_id=None):
|
||||
"""
|
||||
Returns the resolved proxy URL string for a watch, or None.
|
||||
override_id forces a specific proxy (e.g. proxy checker bypass).
|
||||
"""
|
||||
proxy_id = override_id or self.get_preferred_proxy_for_watch(uuid)
|
||||
if proxy_id:
|
||||
return self.proxy_list.get(proxy_id, {}).get('url')
|
||||
return None
|
||||
|
||||
def get_preferred_proxy_for_watch(self, uuid):
|
||||
"""
|
||||
Returns the preferred proxy by ID key
|
||||
@@ -960,71 +872,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BrowserProfile helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_browser_profile(self, machine_name: str):
|
||||
"""Return a BrowserProfile by machine name, or None if not found.
|
||||
|
||||
Built-in profiles (direct_http_requests, browser_chromeplaywright) are
|
||||
always available and checked first.
|
||||
"""
|
||||
from changedetectionio.model.browser_profile import get_profile
|
||||
store_profiles = self.data['settings']['application'].get('browser_profiles', {})
|
||||
return get_profile(machine_name, store_profiles)
|
||||
|
||||
def delete_browser_profile(self, machine_name: str):
|
||||
"""Delete a user-defined BrowserProfile by machine name.
|
||||
|
||||
Rules enforced:
|
||||
- Built-in profiles cannot be deleted.
|
||||
- The profile cannot be the current system default
|
||||
(settings.application.browser_profile); caller must change the
|
||||
default first.
|
||||
- Any watch or tag that referenced this profile is reset to None
|
||||
(falls back through the chain on next fetch).
|
||||
|
||||
Returns the number of watches/tags that were reset.
|
||||
"""
|
||||
from changedetectionio.model.browser_profile import RESERVED_MACHINE_NAMES
|
||||
|
||||
if machine_name in RESERVED_MACHINE_NAMES:
|
||||
raise ValueError(f"Built-in profile '{machine_name}' cannot be deleted")
|
||||
|
||||
system_default = self.data['settings']['application'].get('browser_profile')
|
||||
if system_default == machine_name:
|
||||
raise ValueError(
|
||||
f"Profile '{machine_name}' is the system default. "
|
||||
f"Change the system default before deleting it."
|
||||
)
|
||||
|
||||
store_profiles = self.data['settings']['application'].get('browser_profiles', {})
|
||||
if machine_name not in store_profiles:
|
||||
return 0
|
||||
|
||||
del store_profiles[machine_name]
|
||||
|
||||
reset_count = 0
|
||||
|
||||
# Reset watches that reference this profile
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if watch.get('browser_profile') == machine_name:
|
||||
watch['browser_profile'] = None
|
||||
watch.commit()
|
||||
reset_count += 1
|
||||
|
||||
# Reset tags that reference this profile
|
||||
for tag_uuid, tag in self.data['settings']['application'].get('tags', {}).items():
|
||||
if tag.get('browser_profile') == machine_name:
|
||||
tag['browser_profile'] = None
|
||||
tag.commit()
|
||||
reset_count += 1
|
||||
|
||||
self._save_settings()
|
||||
logger.info(f"Deleted BrowserProfile '{machine_name}', reset {reset_count} watches/tags")
|
||||
return reset_count
|
||||
|
||||
@property
|
||||
def has_extra_headers_file(self):
|
||||
filepath = os.path.join(self.datastore_path, 'headers.txt')
|
||||
@@ -1102,7 +949,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
from ..model import Tag
|
||||
new_tag = Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self,
|
||||
__datastore=self.__data,
|
||||
default={
|
||||
'title': title.strip(),
|
||||
'date_created': int(time.time())
|
||||
@@ -1120,20 +967,12 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
def get_all_tags_for_watch(self, uuid):
|
||||
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
|
||||
watch = self.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return {}
|
||||
|
||||
# Start with manually assigned tags
|
||||
result = dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
# Should return a dict of full tag info linked by UUID
|
||||
if watch:
|
||||
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
|
||||
# Additionally include any tag whose url_match_pattern matches this watch's URL
|
||||
watch_url = watch.get('url', '')
|
||||
if watch_url:
|
||||
for tag_uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
if tag_uuid not in result and tag.matches_url(watch_url):
|
||||
result[tag_uuid] = tag
|
||||
|
||||
return result
|
||||
return {}
|
||||
|
||||
@property
|
||||
def extra_browsers(self):
|
||||
|
||||
@@ -15,7 +15,6 @@ import tarfile
|
||||
import time
|
||||
from loguru import logger
|
||||
from copy import deepcopy
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
@@ -670,9 +669,7 @@ class DatastoreUpdatesMixin:
|
||||
def update_26(self):
|
||||
self.migrate_legacy_db_format()
|
||||
|
||||
# Re-run tag to JSON migration
|
||||
def update_29(self):
|
||||
|
||||
def update_28(self):
|
||||
"""
|
||||
Migrate tags to individual tag.json files.
|
||||
|
||||
@@ -685,6 +682,8 @@ class DatastoreUpdatesMixin:
|
||||
- Enables independent tag versioning/backup
|
||||
- Maintains backwards compatibility (tags stay in settings too)
|
||||
"""
|
||||
# Force save as tag.json (not watch.json) even if object is corrupted
|
||||
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("Running migration: Individual tag persistence (update_28)")
|
||||
logger.critical("Creating individual tag.json files")
|
||||
@@ -703,9 +702,6 @@ class DatastoreUpdatesMixin:
|
||||
failed_count = 0
|
||||
|
||||
for uuid, tag_data in tags.items():
|
||||
if os.path.isfile(os.path.join(self.datastore_path, uuid, "tag.json")):
|
||||
logger.debug(f"Tag {uuid} tag.json exists, skipping")
|
||||
continue
|
||||
try:
|
||||
tag_data.commit()
|
||||
saved_count += 1
|
||||
@@ -727,190 +723,3 @@ class DatastoreUpdatesMixin:
|
||||
logger.info("Future tag edits will update both locations (dual storage)")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
# write it to disk, it will be saved without ['tags'] in the JSON db because we find it from disk glob
|
||||
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
|
||||
self._save_settings()
|
||||
|
||||
def update_31(self):
|
||||
"""
|
||||
Migrate legacy ``fetch_backend`` strings to the new ``browser_profile``
|
||||
machine-name system.
|
||||
|
||||
What this migration does
|
||||
------------------------
|
||||
1. ``settings.requests.extra_browsers`` entries are converted into
|
||||
``BrowserProfile`` objects and stored in
|
||||
``settings.application.browser_profiles`` keyed by machine name.
|
||||
|
||||
2. ``settings.application.fetch_backend`` (the system-wide default) is
|
||||
translated to a machine name and written to
|
||||
``settings.application.browser_profile``.
|
||||
|
||||
3. Every watch that has an explicit ``fetch_backend`` (not ``'system'``)
|
||||
gets a corresponding ``browser_profile`` machine name set, then
|
||||
``fetch_backend`` is reset to ``'system'``.
|
||||
|
||||
4. The same translation is applied to tags with ``overrides_watch=True``
|
||||
that carry an explicit ``fetch_backend``.
|
||||
|
||||
Legacy mapping
|
||||
~~~~~~~~~~~~~~
|
||||
* ``'html_requests'`` → built-in ``'direct_http_requests'``
|
||||
* ``'html_webdriver'`` → built-in ``'browser_chromeplaywright'``
|
||||
* ``'extra_browser_<name>'`` → machine name of the migrated custom profile
|
||||
* ``'system'`` / missing → ``None`` (continue to use chain resolution)
|
||||
|
||||
Safe to re-run: skips watches / tags that already have ``browser_profile``
|
||||
set, and skips extra_browser entries that have already been migrated.
|
||||
"""
|
||||
from ..model.browser_profile import (
|
||||
BrowserProfile,
|
||||
BUILTIN_REQUESTS,
|
||||
BUILTIN_BROWSER,
|
||||
)
|
||||
|
||||
app_settings = self.data['settings']['application']
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 1. Migrate extra_browsers → browser_profiles
|
||||
# ------------------------------------------------------------------
|
||||
extra_browsers = self.data['settings']['requests'].get('extra_browsers', [])
|
||||
browser_profiles: dict = app_settings.setdefault('browser_profiles', {})
|
||||
|
||||
extra_browser_name_to_machine: dict[str, str] = {}
|
||||
|
||||
for entry in extra_browsers:
|
||||
browser_name = entry.get('browser_name', '').strip()
|
||||
connection_url = entry.get('browser_connection_url', '').strip()
|
||||
if not browser_name:
|
||||
continue
|
||||
|
||||
profile = BrowserProfile(
|
||||
name=browser_name,
|
||||
fetch_backend='playwright_cdp',
|
||||
browser_connection_url=connection_url or None,
|
||||
)
|
||||
machine_name = profile.get_machine_name()
|
||||
|
||||
if machine_name not in browser_profiles:
|
||||
browser_profiles[machine_name] = profile.model_dump()
|
||||
logger.info(f"update_31: migrated extra_browser '{browser_name}' → profile '{machine_name}'")
|
||||
|
||||
extra_browser_name_to_machine[browser_name] = machine_name
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helper: translate a fetch_backend string to a machine name
|
||||
# ------------------------------------------------------------------
|
||||
builtin_requests_name = BUILTIN_REQUESTS.get_machine_name()
|
||||
builtin_browser_name = BUILTIN_BROWSER.get_machine_name()
|
||||
|
||||
def _to_machine_name(fetch_backend: str) -> Optional[str]:
|
||||
if not fetch_backend or fetch_backend in ('system', 'default', ''):
|
||||
return None
|
||||
if fetch_backend.startswith('extra_browser_'):
|
||||
key = fetch_backend[len('extra_browser_'):]
|
||||
return extra_browser_name_to_machine.get(key)
|
||||
# Strip legacy html_ prefix then query the fetcher registry
|
||||
from changedetectionio import content_fetchers as cf
|
||||
clean = fetch_backend[5:] if fetch_backend.startswith('html_') else fetch_backend
|
||||
fetcher_cls = cf.get_fetcher(clean)
|
||||
if fetcher_cls is None:
|
||||
logger.warning(f"update_31: unknown fetch_backend value {fetch_backend!r}, skipping")
|
||||
return None
|
||||
if fetcher_cls.supports_screenshots:
|
||||
return builtin_browser_name
|
||||
return builtin_requests_name
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 2. Migrate system-wide default
|
||||
# ------------------------------------------------------------------
|
||||
system_fetch_backend = app_settings.get('fetch_backend', 'requests')
|
||||
if not app_settings.get('browser_profile'):
|
||||
machine = _to_machine_name(system_fetch_backend)
|
||||
app_settings['browser_profile'] = machine
|
||||
logger.info(
|
||||
f"update_31: system fetch_backend '{system_fetch_backend}' → browser_profile '{machine}'"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 3. Migrate watches
|
||||
# ------------------------------------------------------------------
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if watch.get('browser_profile'):
|
||||
continue # already migrated
|
||||
|
||||
fetch_backend = watch.get('fetch_backend', 'system')
|
||||
machine = _to_machine_name(fetch_backend)
|
||||
watch['browser_profile'] = machine
|
||||
watch['fetch_backend'] = 'system' # clear legacy value
|
||||
watch.commit()
|
||||
if machine:
|
||||
logger.info(
|
||||
f"update_31: watch {uuid} fetch_backend '{fetch_backend}' → browser_profile '{machine}'"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 4. Migrate tags
|
||||
# ------------------------------------------------------------------
|
||||
for tag_uuid, tag in app_settings.get('tags', {}).items():
|
||||
if tag.get('browser_profile'):
|
||||
continue # already migrated
|
||||
|
||||
fetch_backend = tag.get('fetch_backend', 'system')
|
||||
machine = _to_machine_name(fetch_backend)
|
||||
if machine:
|
||||
tag['browser_profile'] = machine
|
||||
tag['fetch_backend'] = 'system'
|
||||
tag.commit()
|
||||
logger.info(
|
||||
f"update_31: tag {tag_uuid} fetch_backend '{fetch_backend}' → browser_profile '{machine}'"
|
||||
)
|
||||
|
||||
self._save_settings()
|
||||
logger.success("update_31: fetch_backend → browser_profile migration complete")
|
||||
|
||||
def update_30(self):
|
||||
"""Migrate restock_settings out of watch.json into restock_diff.json processor config file.
|
||||
|
||||
Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
|
||||
were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
|
||||
processor config file (restock_diff.json) consistent with the processor_config_* API system.
|
||||
|
||||
For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
|
||||
matching what the API writes when updating a tag.
|
||||
|
||||
Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
|
||||
have processor_config_restock_diff set.
|
||||
"""
|
||||
import json
|
||||
|
||||
# --- Watches ---
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if watch.get('processor') != 'restock_diff':
|
||||
continue
|
||||
restock_settings = watch.get('restock_settings')
|
||||
if not restock_settings:
|
||||
continue
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if data_dir:
|
||||
watch.ensure_data_dir_exists()
|
||||
filepath = os.path.join(data_dir, 'restock_diff.json')
|
||||
if not os.path.isfile(filepath):
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump({'restock_diff': restock_settings}, f, indent=2)
|
||||
logger.info(f"update_30: migrated restock_settings → {filepath}")
|
||||
|
||||
del self.data['watching'][uuid]['restock_settings']
|
||||
watch.commit()
|
||||
|
||||
# --- Tags ---
|
||||
for tag_uuid, tag in self.data['settings']['application']['tags'].items():
|
||||
restock_settings = tag.get('restock_settings')
|
||||
if not restock_settings or tag.get('processor_config_restock_diff'):
|
||||
continue
|
||||
tag['processor_config_restock_diff'] = restock_settings
|
||||
del tag['restock_settings']
|
||||
tag.commit()
|
||||
logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user