Compare commits

..

6 Commits

Author SHA1 Message Date
dgtlmoon
41952157eb FilterNotFoundInResponse 2025-10-15 15:21:46 +02:00
dgtlmoon
274c575f85 Should be single string field 2025-10-15 13:31:32 +02:00
dgtlmoon
5c39668b40 hash change cleanups 2025-10-15 13:27:29 +02:00
dgtlmoon
254ebc90df normalize fix up for unknown $ 2025-10-15 12:52:39 +02:00
dgtlmoon
912ab903c9 Price currency and amount fixes 2025-10-15 12:33:39 +02:00
dgtlmoon
d758afe87e Implement visual selector override of price data Re #3505 2025-10-15 11:59:12 +02:00
206 changed files with 3837 additions and 13046 deletions

View File

@@ -1,51 +0,0 @@
name: 'Extract Memory Test Report'
description: 'Extracts and displays memory test report from a container'
inputs:
container-name:
description: 'Name of the container to extract logs from'
required: true
python-version:
description: 'Python version for artifact naming'
required: true
output-dir:
description: 'Directory to store output logs'
required: false
default: 'output-logs'
runs:
using: "composite"
steps:
- name: Create output directory
shell: bash
run: |
mkdir -p ${{ inputs.output-dir }}
- name: Dump container log
shell: bash
run: |
echo "Disabled for now"
# return
# docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout"
# docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr"
- name: Extract and display memory test report
shell: bash
run: |
echo "Disabled for now"
# echo "Extracting test-memory.log from container..."
# docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container"
#
# echo "=== Top 10 Highest Peak Memory Tests ==="
# if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then
# grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \
# sed 's/.*Peak memory: //' | \
# paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \
# sort -t'|' -k1 -nr | \
# cut -d'|' -f2 | \
# head -10
# echo ""
# echo "=== Full Memory Test Report ==="
# cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log
# else
# echo "No memory log available"
# fi

View File

@@ -27,16 +27,10 @@ RUN \
file \
nodejs \
poppler-utils \
python3 \
glib \
libsm \
libxext \
libxrender \
py3-opencv && \
python3 && \
echo "**** pip3 install test of changedetection.io ****" && \
python3 -m venv /lsiopy && \
pip install -U pip wheel setuptools && \
grep -v "opencv-python-headless" /requirements.txt > /requirements-alpine.txt && \
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements-alpine.txt && \
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \
apk del --purge \
build-dependencies

View File

@@ -30,7 +30,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL

View File

@@ -15,7 +15,6 @@ on:
push:
branches:
- master
- dev
jobs:
metadata:
@@ -40,20 +39,12 @@ jobs:
# Or if we are in a tagged release scenario.
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Set up Python 3.11
uses: actions/setup-python@v6
with:
python-version: 3.11
- name: Cache pip packages
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
@@ -93,10 +84,10 @@ jobs:
version: latest
driver-opts: image=moby/buildkit:master
# dev branch -> :dev container tag
# master branch -> :dev container tag
- name: Build and push :dev
id: docker_build
if: ${{ github.ref == 'refs/heads/dev' }}
if: ${{ github.ref }} == "refs/heads/master"
uses: docker/build-push-action@v6
with:
context: ./

View File

@@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Set up Python
uses: actions/setup-python@v6
with:
@@ -21,20 +21,20 @@ jobs:
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v4
with:
name: python-package-distributions
path: dist/
test-pypi-package:
name: Test the built package works basically.
name: Test the built 📦 package works basically.
runs-on: ubuntu-latest
needs:
- build
steps:
- name: Download all the dists
uses: actions/download-artifact@v7
uses: actions/download-artifact@v5
with:
name: python-package-distributions
path: dist/
@@ -42,39 +42,18 @@ jobs:
uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Test that the basic pip built package runs without error
run: |
set -ex
ls -alR
# Install the first wheel found in dist/
WHEEL=$(find dist -type f -name "*.whl" -print -quit)
echo Installing $WHEEL
python3 -m pip install --upgrade pip
python3 -m pip install "$WHEEL"
# Find and install the first .whl file
find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
changedetection.io -d /tmp -p 10000 &
sleep 3
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
# --- API test ---
# This also means that the docs/api-spec.yml was shipped and could be read
test -f /tmp/url-watches.json
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
echo Test API KEY is $API_KEY
curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
--show-error --fail \
--retry 6 --retry-delay 1 --retry-connrefused \
-d '{
"url": "https://example.com",
"title": "Example Site Monitor",
"time_between_check": { "hours": 1 }
}'
killall changedetection.io
@@ -93,7 +72,7 @@ jobs:
steps:
- name: Download all the dists
uses: actions/download-artifact@v7
uses: actions/download-artifact@v5
with:
name: python-package-distributions
path: dist/

View File

@@ -44,20 +44,12 @@ jobs:
- platform: linux/arm64
dockerfile: ./.github/test/Dockerfile-alpine
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Set up Python 3.11
uses: actions/setup-python@v6
with:
python-version: 3.11
- name: Cache pip packages
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
# Just test that the build works, some libraries won't compile on ARM/rPi etc
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
@@ -82,5 +74,5 @@ jobs:
file: ${{ matrix.dockerfile }}
platforms: ${{ matrix.platform }}
cache-from: type=gha
cache-to: type=gha,mode=max
cache-to: type=gha,mode=min

View File

@@ -7,7 +7,7 @@ jobs:
lint-code:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Lint with Ruff
run: |
pip install ruff
@@ -21,8 +21,6 @@ jobs:
python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
test-application-3-10:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
@@ -30,15 +28,12 @@ jobs:
test-application-3-11:
# Always run
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.11'
test-application-3-12:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
@@ -46,8 +41,6 @@ jobs:
skip-pypuppeteer: true
test-application-3-13:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:

View File

@@ -15,294 +15,138 @@ on:
default: false
jobs:
# Build the Docker image once and share it with all test jobs
build:
test-application:
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
# Mainly just for link/flake8
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v6
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache pip packages
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
${{ runner.os }}-pip-
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
run: |
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
# Build a changedetection.io container and start testing inside
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
docker run test-changedetectionio bash -c 'pip list'
# Debug info
docker run test-changedetectionio bash -c 'pip list'
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
run: |
docker run test-changedetectionio bash -c 'python3 --version'
- name: Spin up ancillary testable services
run: |
docker run test-changedetectionio bash -c 'python3 --version'
docker network create changedet-network
# Selenium
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
# SocketPuppetBrowser + Extra for custom browser test
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
- name: Save Docker image
- name: Spin up ancillary SMTP+Echo message test server
run: |
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
# Debug SMTP server/echo message back server
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
docker ps
- name: Upload Docker image artifact
uses: actions/upload-artifact@v6
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp/test-changedetectionio.tar
retention-days: 1
# Unit tests (lightweight, no ancillary services needed)
unit-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
- name: Show docker container state and other debug info
run: |
docker load -i /tmp/test-changedetectionio.tar
set -x
echo "Running processes in docker..."
docker ps
- name: Run Unit Tests
run: |
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
# Unit tests
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
# Basic pytest tests with ancillary services
basic-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 25
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
- name: Test built container with Pytest (generally as requests/plaintext fetching)
run: |
docker load -i /tmp/test-changedetectionio.tar
# All tests
echo "run test with pytest"
# The default pytest logger_level is TRACE
# To change logger_level for pytest(test/conftest.py),
# append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG'
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
- name: Test built container with Pytest
# PLAYWRIGHT/NODE-> CDP
- name: Playwright and SocketPuppetBrowser - Specific tests in built container
run: |
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
# Playwright via Sockpuppetbrowser fetch
# tests/visualselector/test_fetch_data.py will do browser steps
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
- name: Extract memory report and logs
if: always()
uses: ./.github/actions/extract-memory-report
with:
container-name: test-cdio-basic-tests
python-version: ${{ env.PYTHON_VERSION }}
- name: Store test artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
path: output-logs
- name: Playwright and SocketPuppetBrowser - Headers and requests
run: |
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
# Playwright tests
playwright-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Playwright and SocketPuppetBrowser - Restock detection
run: |
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
# STRAIGHT TO CDP
- name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
if: ${{ inputs.skip-pypuppeteer == false }}
run: |
docker load -i /tmp/test-changedetectionio.tar
# Playwright via Sockpuppetbrowser fetch
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
- name: Spin up ancillary services
- name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
if: ${{ inputs.skip-pypuppeteer == false }}
run: |
docker network create changedet-network
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Playwright - Specific tests in built container
run: |
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
- name: Playwright - Headers and requests
run: |
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
- name: Playwright - Restock detection
run: |
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
# Pyppeteer tests
pyppeteer-tests:
runs-on: ubuntu-latest
needs: build
if: ${{ inputs.skip-pypuppeteer == false }}
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up ancillary services
run: |
docker network create changedet-network
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
- name: Pyppeteer - Specific tests in built container
run: |
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
- name: Pyppeteer - Headers and requests checks
run: |
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Pyppeteer - Restock detection
run: |
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
# Selenium tests
selenium-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up ancillary services
run: |
docker network create changedet-network
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
sleep 3
- name: Specific tests for headers and requests checks with Selenium
run: |
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Pyppeteer and SocketPuppetBrowser - Restock detection
if: ${{ inputs.skip-pypuppeteer == false }}
run: |
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
# SELENIUM
- name: Specific tests in built container for Selenium
run: |
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
# Selenium fetch
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
# SMTP tests
smtp-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
- name: Specific tests in built container for headers and requests checks with Selenium
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up SMTP test server
run: |
docker network create changedet-network
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
# OTHER STUFF
- name: Test SMTP notification mime types
run: |
# SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
# "mailserver" hostname defined above
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
# Proxy tests
proxy-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up services
run: |
docker network create changedet-network
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
- name: Test proxy Squid style interaction
# @todo Add a test via playwright/puppeteer
# squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py
- name: Test proxy squid style interaction
run: |
cd changedetectionio
./run_proxy_tests.sh
docker ps
cd ..
- name: Test proxy SOCKS5 style interaction
@@ -311,65 +155,28 @@ jobs:
./run_socks_proxy_tests.sh
cd ..
# Custom browser URL tests
custom-browser-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up ancillary services
run: |
docker network create changedet-network
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
- name: Test custom browser URL
run: |
cd changedetectionio
./run_custom_browser_url_tests.sh
cd ..
# Container startup tests
container-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
- name: Test changedetection.io container starts+runs basically without error
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Test container starts+runs basically without error
run: |
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
sleep 3
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
# Should return 0 (no error) when grep finds it
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
# and IPv6
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
# Check whether TRACE log is enabled.
# Also, check whether TRACE came from STDOUT
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
# Check whether DEBUG is came from STDOUT
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
docker kill test-changedetectionio
- name: Test HTTPS SSL mode
@@ -377,66 +184,102 @@ jobs:
openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost"
docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio
sleep 3
# Should return 0 (no error) when grep finds it
# -k because its self-signed
curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid
docker kill test-changedetectionio-ssl
- name: Test IPv6 Mode
run: |
# IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only
docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio
sleep 3
# Should return 0 (no error) when grep finds it on localhost
curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid
docker kill test-changedetectionio-ipv6
# Signal tests
signal-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Test SIGTERM and SIGINT signal shutdown
- name: Test changedetection.io SIGTERM and SIGINT signal shutdown
run: |
echo SIGINT Shutdown request test
docker run --name sig-test -d test-changedetectionio
sleep 3
echo ">>> Sending SIGINT to sig-test container"
docker kill --signal=SIGINT sig-test
sleep 3
# invert the check (it should be not 0/not running)
docker ps
# check signal catch(STDERR) log. Because of
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1
test -z "`docker ps|grep sig-test`"
if [ $? -ne 0 ]; then
if [ $? -ne 0 ]
then
echo "Looks like container was running when it shouldnt be"
docker ps
exit 1
fi
# @todo - scan the container log to see the right "graceful shutdown" text exists
docker rm sig-test
echo SIGTERM Shutdown request test
docker run --name sig-test -d test-changedetectionio
sleep 3
echo ">>> Sending SIGTERM to sig-test container"
docker kill --signal=SIGTERM sig-test
sleep 3
# invert the check (it should be not 0/not running)
docker ps
# check signal catch(STDERR) log. Because of
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1
test -z "`docker ps|grep sig-test`"
if [ $? -ne 0 ]; then
if [ $? -ne 0 ]
then
echo "Looks like container was running when it shouldnt be"
docker ps
exit 1
fi
# @todo - scan the container log to see the right "graceful shutdown" text exists
docker rm sig-test
- name: Dump container log
if: always()
run: |
mkdir output-logs
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
- name: Extract and display memory test report
if: always()
run: |
# Extract test-memory.log from the container
echo "Extracting test-memory.log from container..."
docker cp test-cdio-basic-tests:/app/changedetectionio/test-memory.log output-logs/test-memory-${{ env.PYTHON_VERSION }}.log || echo "test-memory.log not found in container"
# Display the memory log contents for immediate visibility in workflow output
echo "=== Top 10 Highest Peak Memory Tests ==="
if [ -f output-logs/test-memory-${{ env.PYTHON_VERSION }}.log ]; then
# Sort by peak memory value (extract number before MB and sort numerically, reverse order)
grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log | \
sed 's/.*Peak memory: //' | \
paste -d'|' - <(grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log) | \
sort -t'|' -k1 -nr | \
cut -d'|' -f2 | \
head -10
echo ""
echo "=== Full Memory Test Report ==="
cat output-logs/test-memory-${{ env.PYTHON_VERSION }}.log
else
echo "No memory log available"
fi
- name: Store everything including test-datastore
if: always()
uses: actions/upload-artifact@v4
with:
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
path: .

1
.gitignore vendored
View File

@@ -21,7 +21,6 @@ venv/
# IDEs
.idea
.vscode/settings.json
*~
# Datastore files
datastore/

View File

@@ -34,27 +34,23 @@ ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
# Additional environment variables for cryptography Rust build
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
pip install \
--prefer-binary \
--extra-index-url https://www.piwheels.org/simple \
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
-r /requirements.txt
RUN --mount=type=cache,target=/tmp/pip-cache \
pip install \
--extra-index-url https://www.piwheels.org/simple \
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
-r /requirements.txt
# Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
pip install \
--prefer-binary \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
playwright~=1.56.0 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
RUN --mount=type=cache,target=/tmp/pip-cache \
pip install \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
playwright~=1.48.0 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
# Final image stage
FROM python:${PYTHON_VERSION}-slim-bookworm
@@ -69,11 +65,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# favicon type detection and other uses
file \
zlib1g \
# OpenCV dependencies for image processing
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

View File

@@ -1,5 +1,4 @@
recursive-include changedetectionio/api *
include docs/api-spec.yaml
recursive-include changedetectionio/blueprint *
recursive-include changedetectionio/conditions *
recursive-include changedetectionio/content_fetchers *

View File

@@ -14,7 +14,7 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
@@ -31,7 +31,7 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti
### Perform interactive browser steps
Fill in text boxes, click buttons and more, setup your changedetection scenario.
Fill in text boxes, click buttons and more, setup your changedetection scenario.
Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
@@ -54,7 +54,7 @@ Requires Playwright to be enabled.
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
- COVID related news from government websites
- University/organisation news from their website
- Detect and monitor changes in JSON API responses
- Detect and monitor changes in JSON API responses
- JSON API monitoring and alerting
- Changes in legal and other documents
- Trigger API calls via notifications when text appears on a website
@@ -86,7 +86,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residential, ISP, Rotating and many other proxy types to suit your project.
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
@@ -106,3 +106,4 @@ $ changedetection.io -d /path/to/empty/data/dir -p 5000
Then visit http://127.0.0.1:5000 , You should now be able to access the UI.
See https://changedetection.io for more information.

View File

@@ -64,7 +64,7 @@ def count_words_in_history(watch):
return 0
latest_key = list(watch.history.keys())[-1]
latest_content = watch.get_history_snapshot(timestamp=latest_key)
latest_content = watch.get_history_snapshot(latest_key)
return len(latest_content.split())
except Exception as e:
logger.error(f"Error counting words: {str(e)}")

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python3
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.51.4'
__version__ = '0.50.24'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -74,12 +74,6 @@ def main():
datastore_path = None
do_cleanup = False
# Optional URL to watch since start
default_url = None
# Set a default logger level
logger_level = 'DEBUG'
include_default_watches = True
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
port = int(os.environ.get('PORT', 5000))
ssl_mode = False
@@ -93,13 +87,15 @@ def main():
datastore_path = os.path.join(os.getcwd(), "../datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
sys.exit(2)
create_datastore_dir = False
# Set a default logger level
logger_level = 'DEBUG'
# Set a logger level via shell env variable
# Used: Dockerfile for CICD
# To set logger level for pytest, see the app function in tests/conftest.py
@@ -120,10 +116,6 @@ def main():
if opt == '-d':
datastore_path = arg
if opt == '-u':
default_url = arg
include_default_watches = False
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
@@ -180,20 +172,13 @@ def main():
sys.exit(2)
try:
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__, include_default_watches=include_default_watches)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
except JSONDecodeError as e:
# Dont' start if the JSON DB looks corrupt
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
logger.critical(str(e))
return
# Inject datastore into plugins that need access to settings
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
inject_datastore_into_plugins(datastore)
if default_url:
datastore.add_watch(url = default_url)
app = changedetection_app(app_config, datastore)
# Get the SocketIO instance from the Flask app (created in flask_app.py)

View File

@@ -1,22 +1,9 @@
import os
from changedetectionio.strtobool import strtobool
from flask_restful import abort, Resource
from flask import request
from functools import wraps
import validators
from . import auth, validate_openapi_request
from ..validate_url import is_safe_valid_url
def default_content_type(content_type='text/plain'):
"""Decorator to set a default Content-Type header if none is provided."""
def decorator(f):
@wraps(f)
def wrapper(*args, **kwargs):
if not request.content_type:
# Set default content type in the request environment
request.environ['CONTENT_TYPE'] = content_type
return f(*args, **kwargs)
return wrapper
return decorator
class Import(Resource):
@@ -25,7 +12,6 @@ class Import(Resource):
self.datastore = kwargs['datastore']
@auth.check_token
@default_content_type('text/plain') #3547 #3542
@validate_openapi_request('importWatches')
def post(self):
"""Import a list of watched URLs."""
@@ -49,13 +35,14 @@ class Import(Resource):
urls = request.get_data().decode('utf8').splitlines()
added = []
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
for url in urls:
url = url.strip()
if not len(url):
continue
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
if not is_safe_valid_url(url):
if not validators.url(url, simple_host=allow_simplehost):
return f"Invalid or unsupported URL - {url}", 400
if dedupe and self.datastore.url_exists(url):

View File

@@ -1,19 +1,17 @@
import os
from changedetectionio.validate_url import is_safe_valid_url
from changedetectionio.strtobool import strtobool
from flask_expects_json import expects_json
from changedetectionio import queuedWatchMetaData, strtobool
from changedetectionio import queuedWatchMetaData
from changedetectionio import worker_handler
from flask_restful import abort, Resource
from flask import request, make_response, send_from_directory
import validators
from . import auth
import copy
# Import schemas from __init__.py
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
from ..notification import valid_notification_formats
from ..notification.handler import newline_re
def validate_time_between_check_required(json_data):
@@ -123,10 +121,6 @@ class Watch(Resource):
if validation_error:
return validation_error, 400
# XSS etc protection
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
return "Invalid URL", 400
watch.update(request.json)
return "OK", 200
@@ -177,120 +171,12 @@ class WatchSingleHistory(Resource):
response = make_response("No content found", 404)
response.mimetype = "text/plain"
else:
content = watch.get_history_snapshot(timestamp=timestamp)
content = watch.get_history_snapshot(timestamp)
response = make_response(content, 200)
response.mimetype = "text/plain"
return response
class WatchHistoryDiff(Resource):
"""
Generate diff between two historical snapshots.
Note: This API endpoint currently returns text-based diffs and works best
with the text_json_diff processor. Future processor types (like image_diff,
restock_diff) may want to implement their own specialized API endpoints
for returning processor-specific data (e.g., price charts, image comparisons).
The web UI diff page (/diff/<uuid>) is processor-aware and delegates rendering
to processors/{type}/difference.py::render() for processor-specific visualizations.
"""
def __init__(self, **kwargs):
# datastore is a black box dependency
self.datastore = kwargs['datastore']
@auth.check_token
@validate_openapi_request('getWatchHistoryDiff')
def get(self, uuid, from_timestamp, to_timestamp):
"""Generate diff between two historical snapshots."""
from changedetectionio import diff
from changedetectionio.notification.handler import apply_service_tweaks
watch = self.datastore.data['watching'].get(uuid)
if not watch:
abort(404, message=f"No watch exists with the UUID of {uuid}")
if not len(watch.history):
abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
history_keys = list(watch.history.keys())
# Handle 'latest' keyword for to_timestamp
if to_timestamp == 'latest':
to_timestamp = history_keys[-1]
# Handle 'previous' keyword for from_timestamp (second-most-recent)
if from_timestamp == 'previous':
if len(history_keys) < 2:
abort(404, message=f"Not enough history entries. Need at least 2 snapshots for 'previous'")
from_timestamp = history_keys[-2]
# Validate timestamps exist
if from_timestamp not in watch.history:
abort(404, message=f"From timestamp {from_timestamp} not found in watch history")
if to_timestamp not in watch.history:
abort(404, message=f"To timestamp {to_timestamp} not found in watch history")
# Get the format parameter (default to 'text')
output_format = request.args.get('format', 'text').lower()
# Validate format
if output_format not in valid_notification_formats.keys():
abort(400, message=f"Invalid format. Must be one of: {', '.join(valid_notification_formats.keys())}")
# Get the word_diff parameter (default to False - line-level mode)
word_diff = strtobool(request.args.get('word_diff', 'false'))
# Get the no_markup parameter (default to False)
no_markup = strtobool(request.args.get('no_markup', 'false'))
# Retrieve snapshot contents
from_version_file_contents = watch.get_history_snapshot(from_timestamp)
to_version_file_contents = watch.get_history_snapshot(to_timestamp)
# Get diff preferences (using defaults similar to the existing code)
diff_prefs = {
'diff_ignoreWhitespace': False,
'diff_changesOnly': True
}
# Generate the diff
content = diff.render_diff(
previous_version_file_contents=from_version_file_contents,
newest_version_file_contents=to_version_file_contents,
ignore_junk=diff_prefs.get('diff_ignoreWhitespace'),
include_equal=not diff_prefs.get('diff_changesOnly'),
word_diff=word_diff,
)
# Skip formatting if no_markup is set
if no_markup:
mimetype = "text/plain"
else:
# Apply formatting based on the requested format
if output_format == 'htmlcolor':
from changedetectionio.notification.handler import apply_html_color_to_body
content = apply_html_color_to_body(n_body=content)
mimetype = "text/html"
else:
# Apply service tweaks for text/html formats
# Pass empty URL and title as they're not used for the placeholder replacement we need
_, content, _ = apply_service_tweaks(
url='',
n_body=content,
n_title='',
requested_output_format=output_format
)
mimetype = "text/html" if output_format == 'html' else "text/plain"
if 'html' in output_format:
content = newline_re.sub('<br>\r\n', content)
response = make_response(content, 200)
response.mimetype = mimetype
return response
class WatchFavicon(Resource):
def __init__(self, **kwargs):
# datastore is a black box dependency
@@ -340,7 +226,9 @@ class CreateWatch(Resource):
json_data = request.get_json()
url = json_data['url'].strip()
if not is_safe_valid_url(url):
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
if not validators.url(url, simple_host=allow_simplehost):
return "Invalid or unsupported URL", 400
if json_data.get('proxy'):

View File

@@ -37,11 +37,7 @@ def get_openapi_spec():
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
if not os.path.exists(spec_path):
# Possibly for pip3 packages
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
with open(spec_path, 'r', encoding='utf-8') as f:
with open(spec_path, 'r') as f:
spec_dict = yaml.safe_load(f)
_openapi_spec = OpenAPI.from_dict(spec_dict)
return _openapi_spec
@@ -51,7 +47,6 @@ def validate_openapi_request(operation_id):
def decorator(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
from werkzeug.exceptions import BadRequest
try:
# Skip OpenAPI validation for GET requests since they don't have request bodies
if request.method.upper() != 'GET':
@@ -62,6 +57,7 @@ def validate_openapi_request(operation_id):
openapi_request = FlaskOpenAPIRequest(request)
result = spec.unmarshal_request(openapi_request)
if result.errors:
from werkzeug.exceptions import BadRequest
error_details = []
for error in result.errors:
error_details.append(str(error))
@@ -78,7 +74,7 @@ def validate_openapi_request(operation_id):
return decorator
# Import all API resources
from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, WatchFavicon
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
from .Tags import Tags, Tag
from .Import import Import
from .SystemInfo import SystemInfo

View File

@@ -96,10 +96,7 @@ def build_watch_json_schema(d):
"enum": ["html_requests", "html_webdriver"]
})
schema['properties']['processor'] = {"anyOf": [
{"type": "string", "enum": ["restock_diff", "text_json_diff"]},
{"type": "null"}
]}
# All headers must be key/value type dict
schema['properties']['headers'] = {

View File

@@ -1,5 +1,3 @@
from blinker import signal
from .processors.exceptions import ProcessorException
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
@@ -99,9 +97,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid)
update_signal = signal('watch_small_status_comment')
update_signal.send(watch_uuid=uuid, status="Fetching page..")
# All fetchers are now async, so call directly
await update_handler.call_browser()
@@ -314,7 +309,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
if not datastore.data['watching'].get(uuid):
continue
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler.xpath_data else 'empty.'}")
if process_changedetection_results:
try:
datastore.update_watch(uuid=uuid, update_obj=update_obj)
@@ -359,15 +353,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
count = watch.get('check_count', 0) + 1
# Always record page title (used in notifications, and can change even when the content is the same)
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
try:
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
if page_title:
page_title = page_title.strip()[:2000]
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
except Exception as e:
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
try:
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
except Exception as e:
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
# Record server header
try:

View File

@@ -439,7 +439,7 @@ class browsersteps_live_ui(steppable_browser_interface):
logger.warning("Attempted to get current state after cleanup")
return (None, None)
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding="utf-8")
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
now = time.time()
await self.page.wait_for_timeout(1 * 1000)

View File

@@ -1,27 +1 @@
from copy import deepcopy
from loguru import logger
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from changedetectionio.notification import valid_notification_formats
RSS_CONTENT_FORMAT_DEFAULT = 'text'
# Some stuff not related
RSS_FORMAT_TYPES = deepcopy(valid_notification_formats)
if RSS_FORMAT_TYPES.get('markdown'):
del RSS_FORMAT_TYPES['markdown']
if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH):
del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH]
if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT):
logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}")
RSS_TEMPLATE_TYPE_OPTIONS = {'system_default': 'System default', 'notification_body': 'Notification body'}
# @note: We use <pre> because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
RSS_TEMPLATE_PLAINTEXT_DEFAULT = "<pre>{{watch_label}} had a change.\n\n{{diff}}\n</pre>"
# @todo add some [edit]/[history]/[goto] etc links
# @todo need {{watch_edit_link}} + delete + history link token
RSS_TEMPLATE_HTML_DEFAULT = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_label}}</a></h4>\n<p>{{diff}}</p>\n</body></html>\n"
RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]

View File

@@ -1,156 +0,0 @@
"""
Utility functions for RSS feed generation.
"""
from changedetectionio.notification.handler import process_notification
from changedetectionio.notification_service import NotificationContextData, _check_cascading_vars
from loguru import logger
import datetime
import pytz
import re
BAD_CHARS_REGEX = r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
def scan_invalid_chars_in_rss(content):
"""
Scan for invalid characters in RSS content.
Returns True if invalid characters are found.
"""
for match in re.finditer(BAD_CHARS_REGEX, content):
i = match.start()
bad_char = content[i]
hex_value = f"0x{ord(bad_char):02x}"
# Grab context
start = max(0, i - 20)
end = min(len(content), i + 21)
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
# First match is enough
return True
return False
def clean_entry_content(content):
"""
Remove invalid characters from RSS content.
"""
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
return cleaned
def generate_watch_guid(watch, timestamp):
"""
Generate a unique GUID for a watch RSS entry.
Args:
watch: The watch object
timestamp: The timestamp of the specific change this entry represents
"""
return f"{watch['uuid']}/{timestamp}"
def validate_rss_token(datastore, request):
"""
Validate the RSS access token from the request.
Returns:
tuple: (is_valid, error_response) where error_response is None if valid
"""
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
if rss_url_token != app_rss_token:
return False, ("Access denied, bad token", 403)
return True, None
def get_rss_template(datastore, watch, rss_content_format, default_html, default_plaintext):
"""Get the appropriate template for RSS content."""
if datastore.data['settings']['application'].get('rss_template_type') == 'notification_body':
return _check_cascading_vars(datastore=datastore, var_name='notification_body', watch=watch)
override = datastore.data['settings']['application'].get('rss_template_override')
if override and override.strip():
return override
elif 'text' in rss_content_format:
return default_plaintext
else:
return default_html
def get_watch_label(datastore, watch):
"""Get the label for a watch based on settings."""
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
return watch.label
else:
return watch.get('url')
def add_watch_categories(fe, watch, datastore):
"""Add category tags to a feed entry based on watch tags."""
for tag_uuid in watch.get('tags', []):
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
if tag and tag.get('title'):
fe.category(term=tag.get('title'))
def build_notification_context(watch, timestamp_from, timestamp_to, watch_label,
n_body_template, rss_content_format):
"""Build the notification context object."""
return NotificationContextData(initial_data={
'notification_urls': ['null://just-sending-a-null-test-for-the-render-in-RSS'],
'notification_body': n_body_template,
'timestamp_to': timestamp_to,
'timestamp_from': timestamp_from,
'watch_label': watch_label,
'notification_format': rss_content_format
})
def render_notification(n_object, notification_service, watch, datastore,
date_index_from=None, date_index_to=None):
"""Process and render the notification content."""
kwargs = {'n_object': n_object, 'watch': watch}
if date_index_from is not None and date_index_to is not None:
kwargs['date_index_from'] = date_index_from
kwargs['date_index_to'] = date_index_to
n_object = notification_service.queue_notification_for_watch(**kwargs)
n_object['watch_mime_type'] = None
res = process_notification(n_object=n_object, datastore=datastore)
return res[0]
def populate_feed_entry(fe, watch, content, guid, timestamp, link=None, title_suffix=None):
"""Populate a feed entry with content and metadata."""
watch_label = watch.get('url') # Already determined by caller
# Set link
if link:
fe.link(link=link)
# Set title
if title_suffix:
fe.title(title=f"{watch_label} - {title_suffix}")
else:
fe.title(title=watch_label)
# Clean and set content
if scan_invalid_chars_in_rss(content):
content = clean_entry_content(content)
fe.content(content=content, type='CDATA')
# Set GUID
fe.guid(guid, permalink=False)
# Set pubDate using the timestamp of this specific change
dt = datetime.datetime.fromtimestamp(int(timestamp))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)

View File

@@ -1,26 +1,150 @@
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.store import ChangeDetectionStore
from flask import Blueprint
from feedgen.feed import FeedGenerator
from flask import Blueprint, make_response, request, url_for, redirect
from loguru import logger
import datetime
import pytz
import re
import time
from . import tag as tag_routes
from . import main_feed
from . import single_watch
BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
# Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc)
def scan_invalid_chars_in_rss(content):
for match in re.finditer(BAD_CHARS_REGEX, content):
i = match.start()
bad_char = content[i]
hex_value = f"0x{ord(bad_char):02x}"
# Grab context
start = max(0, i - 20)
end = min(len(content), i + 21)
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
# First match is enough
return True
return False
def clean_entry_content(content):
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
return cleaned
def construct_blueprint(datastore: ChangeDetectionStore):
"""
Construct and configure the RSS blueprint with all routes.
Args:
datastore: The ChangeDetectionStore instance
Returns:
The configured Flask blueprint
"""
rss_blueprint = Blueprint('rss', __name__)
# Register all route modules
main_feed.construct_main_feed_routes(rss_blueprint, datastore)
single_watch.construct_single_watch_routes(rss_blueprint, datastore)
tag_routes.construct_tag_routes(rss_blueprint, datastore)
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
# to some earlier blueprint rerouting work, it should goto feed.
@rss_blueprint.route("/", methods=['GET'])
def extraslash():
return redirect(url_for('rss.feed'))
# Import the login decorator if needed
# from changedetectionio.auth_decorator import login_optionally_required
@rss_blueprint.route("", methods=['GET'])
def feed():
now = time.time()
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
from changedetectionio import diff
limit_tag = request.args.get('tag', '').lower().strip()
# Be sure limit_tag is a uuid
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
if limit_tag == tag.get('title', '').lower().strip():
limit_tag = uuid
# Sort by last_changed and add the uuid which is usually the key..
sorted_watches = []
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
for uuid, watch in datastore.data['watching'].items():
# @todo tag notification_muted skip also (improve Watch model)
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue
if limit_tag and not limit_tag in watch['tags']:
continue
watch['uuid'] = uuid
sorted_watches.append(watch)
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
fg = FeedGenerator()
fg.title('changedetection.io')
fg.description('Feed description')
fg.link(href='https://changedetection.io')
html_colour_enable = False
if datastore.data['settings']['application'].get('rss_content_format') == 'html':
html_colour_enable = True
for watch in sorted_watches:
dates = list(watch.history.keys())
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
if len(dates) < 2:
continue
if not watch.viewed:
# Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
fe = fg.add_entry()
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
# Description is the page you watch, link takes you to the diff JS UI page
# Dict val base_url will get overriden with the env var if it is set.
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
# @todo fix
# Because we are called via whatever web server, flask should figure out the right path (
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
# Same logic as watch-overview.html
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
watch_label = watch.label
else:
watch_label = watch.get('url')
fe.title(title=watch_label)
try:
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
include_equal=False,
line_feed_sep="<br>",
html_colour=html_colour_enable
)
except FileNotFoundError as e:
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
# @todo Make this configurable and also consider html-colored markup
# @todo User could decide if <link> goes to the diff page, or to the watch link
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
# Out of range chars could also break feedgen
if scan_invalid_chars_in_rss(content):
content = clean_entry_content(content)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
return response
return rss_blueprint

View File

@@ -1,105 +0,0 @@
from flask import make_response, request, url_for, redirect
def construct_main_feed_routes(rss_blueprint, datastore):
"""
Construct the main RSS feed routes.
Args:
rss_blueprint: The Flask blueprint to add routes to
datastore: The ChangeDetectionStore instance
"""
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
# to some earlier blueprint rerouting work, it should goto feed.
@rss_blueprint.route("/", methods=['GET'])
def extraslash():
return redirect(url_for('rss.feed'))
# Import the login decorator if needed
# from changedetectionio.auth_decorator import login_optionally_required
@rss_blueprint.route("", methods=['GET'])
def feed():
from feedgen.feed import FeedGenerator
from loguru import logger
import time
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
from ._util import (validate_rss_token, generate_watch_guid, get_rss_template,
get_watch_label, build_notification_context, render_notification,
populate_feed_entry, add_watch_categories)
from ...notification_service import NotificationService
now = time.time()
# Validate token
is_valid, error = validate_rss_token(datastore, request)
if not is_valid:
return error
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
limit_tag = request.args.get('tag', '').lower().strip()
# Be sure limit_tag is a uuid
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
if limit_tag == tag.get('title', '').lower().strip():
limit_tag = uuid
# Sort by last_changed and add the uuid which is usually the key..
sorted_watches = []
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
for uuid, watch in datastore.data['watching'].items():
# @todo tag notification_muted skip also (improve Watch model)
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue
if limit_tag and not limit_tag in watch['tags']:
continue
sorted_watches.append(watch)
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
fg = FeedGenerator()
fg.title('changedetection.io')
fg.description('Feed description')
fg.link(href='https://changedetection.io')
notification_service = NotificationService(datastore=datastore, notification_q=False)
for watch in sorted_watches:
dates = list(watch.history.keys())
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
if len(dates) < 2:
continue
if not watch.viewed:
# Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
watch_label = get_watch_label(datastore, watch)
timestamp_to = dates[-1]
timestamp_from = dates[-2]
guid = generate_watch_guid(watch, timestamp_to)
# Because we are called via whatever web server, flask should figure out the right path
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=watch['uuid'], _external=True)}
# Get template and build notification context
n_body_template = get_rss_template(datastore, watch, rss_content_format,
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
watch_label, n_body_template, rss_content_format)
# Render notification
res = render_notification(n_object, notification_service, watch, datastore)
# Create and populate feed entry
fe = fg.add_entry()
populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link)
fe.title(title=watch_label) # Override title to not include suffix
add_watch_categories(fe, watch, datastore)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
return response

View File

@@ -1,115 +0,0 @@
def construct_single_watch_routes(rss_blueprint, datastore):
"""
Construct RSS feed routes for single watches.
Args:
rss_blueprint: The Flask blueprint to add routes to
datastore: The ChangeDetectionStore instance
"""
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
def rss_single_watch(uuid):
import time
from flask import make_response, request
from feedgen.feed import FeedGenerator
from loguru import logger
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
from ._util import (validate_rss_token, get_rss_template, get_watch_label,
build_notification_context, render_notification,
populate_feed_entry, add_watch_categories)
from ...notification_service import NotificationService
"""
Display the most recent changes for a single watch as RSS feed.
Returns RSS XML with multiple entries showing diffs between consecutive snapshots.
The number of entries is controlled by the rss_diff_length setting.
"""
now = time.time()
# Validate token
is_valid, error = validate_rss_token(datastore, request)
if not is_valid:
return error
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
# Get the watch by UUID
watch = datastore.data['watching'].get(uuid)
if not watch:
return f"Watch with UUID {uuid} not found", 404
# Check if watch has at least 2 history snapshots
dates = list(watch.history.keys())
if len(dates) < 2:
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
# Add uuid to watch for proper functioning
watch['uuid'] = uuid
# Get the number of diffs to include (default: 5)
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
# Calculate how many diffs we can actually show (limited by available history)
# We need at least 2 snapshots to create 1 diff
max_possible_diffs = len(dates) - 1
num_diffs = min(rss_diff_length, max_possible_diffs) if rss_diff_length > 0 else max_possible_diffs
# Create RSS feed
fg = FeedGenerator()
# Set title: use "label (url)" if label differs from url, otherwise just url
watch_url = watch.get('url', '')
watch_label = get_watch_label(datastore, watch)
if watch_label != watch_url:
feed_title = f'changedetection.io - {watch_label} ({watch_url})'
else:
feed_title = f'changedetection.io - {watch_url}'
fg.title(feed_title)
fg.description('Changes')
fg.link(href='https://changedetection.io')
# Loop through history and create RSS entries for each diff
# Add entries in reverse order because feedgen reverses them
# This way, the newest change appears first in the final RSS
notification_service = NotificationService(datastore=datastore, notification_q=False)
for i in range(num_diffs - 1, -1, -1):
# Calculate indices for this diff (working backwards from newest)
# i=0: compare dates[-2] to dates[-1] (most recent change)
# i=1: compare dates[-3] to dates[-2] (previous change)
# etc.
date_index_to = -(i + 1)
date_index_from = -(i + 2)
timestamp_to = dates[date_index_to]
timestamp_from = dates[date_index_from]
# Get template and build notification context
n_body_template = get_rss_template(datastore, watch, rss_content_format,
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
watch_label, n_body_template, rss_content_format)
# Render notification with date indices
res = render_notification(n_object, notification_service, watch, datastore,
date_index_from, date_index_to)
# Create and populate feed entry
guid = f"{watch['uuid']}/{timestamp_to}"
fe = fg.add_entry()
title_suffix = f"Change @ {res['original_context']['change_datetime']}"
populate_feed_entry(fe, watch, res.get('body', ''), guid, timestamp_to,
link={'href': watch.get('url')}, title_suffix=title_suffix)
add_watch_categories(fe, watch, datastore)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
logger.debug(f"RSS Single watch built in {time.time()-now:.2f}s")
return response

View File

@@ -1,98 +0,0 @@
def construct_tag_routes(rss_blueprint, datastore):
"""
Construct RSS feed routes for tags.
Args:
rss_blueprint: The Flask blueprint to add routes to
datastore: The ChangeDetectionStore instance
"""
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
def rss_tag_feed(tag_uuid):
from flask import make_response, request, url_for
from feedgen.feed import FeedGenerator
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
from ._util import (validate_rss_token, generate_watch_guid, get_rss_template,
get_watch_label, build_notification_context, render_notification,
populate_feed_entry, add_watch_categories)
from ...notification_service import NotificationService
"""
Display an RSS feed for all unviewed watches that belong to a specific tag.
Returns RSS XML with entries for each unviewed watch with sufficient history.
"""
# Validate token
is_valid, error = validate_rss_token(datastore, request)
if not is_valid:
return error
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
# Verify tag exists
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
if not tag:
return f"Tag with UUID {tag_uuid} not found", 404
tag_title = tag.get('title', 'Unknown Tag')
# Create RSS feed
fg = FeedGenerator()
fg.title(f'changedetection.io - {tag_title}')
fg.description(f'Changes for watches tagged with {tag_title}')
fg.link(href='https://changedetection.io')
notification_service = NotificationService(datastore=datastore, notification_q=False)
# Find all watches with this tag
for uuid, watch in datastore.data['watching'].items():
#@todo This is wrong, it needs to sort by most recently changed and then limit it datastore.data['watching'].items().sorted(?)
# So get all watches in this tag then sort
# Skip if watch doesn't have this tag
if tag_uuid not in watch.get('tags', []):
continue
# Skip muted watches if configured
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue
# Check if watch has at least 2 history snapshots
dates = list(watch.history.keys())
if len(dates) < 2:
continue
# Only include unviewed watches
if not watch.viewed:
# Add uuid to watch for proper functioning
watch['uuid'] = uuid
# Include a link to the diff page
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=watch['uuid'], _external=True)}
# Get watch label
watch_label = get_watch_label(datastore, watch)
# Get template and build notification context
timestamp_to = dates[-1]
timestamp_from = dates[-2]
# Generate GUID for this entry
guid = generate_watch_guid(watch, timestamp_to)
n_body_template = get_rss_template(datastore, watch, rss_content_format,
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
watch_label, n_body_template, rss_content_format)
# Render notification
res = render_notification(n_object, notification_service, watch, datastore)
# Create and populate feed entry
fe = fg.add_entry()
title_suffix = f"Change @ {res['original_context']['change_datetime']}"
populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link, title_suffix=title_suffix)
add_watch_categories(fe, watch, datastore)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
return response

View File

@@ -17,12 +17,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@login_optionally_required
def settings_page():
from changedetectionio import forms
from changedetectionio.pluggy_interface import (
get_plugin_settings_tabs,
load_plugin_settings,
save_plugin_settings
)
default = deepcopy(datastore.data['settings'])
if datastore.proxy_list is not None:
@@ -108,20 +102,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return redirect(url_for('watchlist.index'))
datastore.needs_write_urgent = True
# Also save plugin settings from the same form submission
plugin_tabs_list = get_plugin_settings_tabs()
for tab in plugin_tabs_list:
plugin_id = tab['plugin_id']
form_class = tab['form_class']
# Instantiate plugin form with POST data
plugin_form = form_class(formdata=request.form)
# Save plugin settings (validation is optional for plugins)
if plugin_form.data:
save_plugin_settings(datastore.datastore_path, plugin_id, plugin_form.data)
flash("Settings updated.")
else:
@@ -130,30 +110,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Convert to ISO 8601 format, all date/time relative events stored as UTC time
utc_time = datetime.now(ZoneInfo("UTC")).isoformat()
# Get active plugins
from changedetectionio.pluggy_interface import get_active_plugins
import sys
active_plugins = get_active_plugins()
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
# Get plugin settings tabs and instantiate forms
plugin_tabs = get_plugin_settings_tabs()
plugin_forms = {}
for tab in plugin_tabs:
plugin_id = tab['plugin_id']
form_class = tab['form_class']
# Load existing settings
settings = load_plugin_settings(datastore.datastore_path, plugin_id)
# Instantiate the form with existing settings
plugin_forms[plugin_id] = form_class(data=settings)
output = render_template("settings.html",
active_plugins=active_plugins,
api_key=datastore.data['settings']['application'].get('api_access_token'),
python_version=python_version,
available_timezones=sorted(available_timezones()),
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
@@ -163,8 +121,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
settings_application=datastore.data['settings']['application'],
timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
utc_time=utc_time,
plugin_tabs=plugin_tabs,
plugin_forms=plugin_forms,
)
return output

View File

@@ -2,7 +2,7 @@
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
{% from '_common_fields.html' import render_common_settings_form, show_token_placeholders %}
{% from '_common_fields.html' import render_common_settings_form %}
<script>
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
{% if emailprefix %}
@@ -24,15 +24,8 @@
<li class="tab"><a href="#filters">Global Filters</a></li>
<li class="tab"><a href="#ui-options">UI Options</a></li>
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#rss">RSS</a></li>
<li class="tab"><a href="#timedate">Time &amp Date</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
{% if plugin_tabs %}
{% for tab in plugin_tabs %}
<li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
{% endfor %}
{% endif %}
<li class="tab"><a href="#info">Info</a></li>
</ul>
</div>
<div class="box-wrap inner">
@@ -50,6 +43,10 @@
</div>
</div>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
@@ -72,13 +69,26 @@
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
<span class="pure-form-message-inline">Allow access to the watch change history page when password is enabled (Good for sharing the diff page)
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
</div>
<div class="grey-form-border">
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_content_format) }}
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
</div>
</div>
</fieldset>
</div>
@@ -121,10 +131,6 @@
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.timeout) }}
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
@@ -224,37 +230,7 @@ nav
</p>
</div>
</div>
<div class="tab-pane-inner" id="rss">
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_diff_length) }}
<span class="pure-form-message-inline">Maximum number of history snapshots to include in the watch specific RSS feed.</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
<span class="pure-form-message-inline">For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
</div>
<div class="pure-control-group grey-form-border">
<div class="pure-control-group">
{{ render_field(form.application.form.rss_content_format) }}
<span class="pure-form-message-inline">Does your reader support HTML? Set it here</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_template_type) }}
<span class="pure-form-message-inline">'System default' for the same template for all items, or re-use your "Notification Body" as the template.</span>
</div>
<div>
{{ render_field(form.application.form.rss_template_override) }}
{{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss") }}
</div>
</div>
<br>
</div>
<div class="tab-pane-inner" id="timedate">
<div class="tab-pane-inner" id="timedate">
<div class="pure-control-group">
Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.
</div>
@@ -264,7 +240,9 @@ nav
<p>
{{ render_field(form.application.form.scheduler_timezone_default) }}
<datalist id="timezones" style="display: none;">
{%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%}
{% for tz_name in available_timezones %}
<option value="{{ tz_name }}">{{ tz_name }}</option>
{% endfor %}
</datalist>
</p>
</div>
@@ -358,45 +336,7 @@ nav
</p>
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
</div>
</div>
{% if plugin_tabs %}
{% for tab in plugin_tabs %}
<div class="tab-pane-inner" id="plugin-{{ tab.plugin_id }}">
{% set plugin_form = plugin_forms[tab.plugin_id] %}
{% if tab.template_path %}
{# Plugin provides custom template - include it directly (no separate form) #}
{% include tab.template_path with context %}
{% else %}
{# Default form rendering - fields only, no submit button #}
<fieldset>
{% for field in plugin_form %}
{% if field.type != 'CSRFToken' and field.type != 'SubmitField' %}
<div class="pure-control-group">
{% if field.type == 'BooleanField' %}
{{ render_checkbox_field(field) }}
{% else %}
{{ render_field(field) }}
{% endif %}
</div>
{% endif %}
{% endfor %}
</fieldset>
{% endif %}
</div>
{% endfor %}
{% endif %}
<div class="tab-pane-inner" id="info">
<p><strong>Python version:</strong> {{ python_version }}</p>
<p><strong>Plugins active:</strong></p>
{% if active_plugins %}
<ul>
{% for plugin in active_plugins %}
<li><strong>{{ plugin.name }}</strong> - {{ plugin.description }}</li>
{% endfor %}
</ul>
{% else %}
<p>No plugins active</p>
{% endif %}
</div>
<div id="actions">
<div class="pure-control-group">

View File

@@ -21,10 +21,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
output = render_template("groups-overview.html",
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
available_tags=sorted_tags,
form=add_form,
tag_count=tag_count,
tag_count=tag_count
)
return output
@@ -150,9 +149,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
included_content = template.render(**template_args)
output = render_template("edit-tag.html",
extra_form_content=included_content,
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
settings_application=datastore.data['settings']['application'],
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
extra_form_content=included_content,
**template_args
)

View File

@@ -52,7 +52,6 @@
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>&nbsp;
<a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a>
<a class="pure-button pure-button-primary" href="{{ url_for('tags.unlink', uuid=uuid) }}" title="Keep the tag but unlink any watches">Unlink</a>
<a href="{{ url_for('rss.rss_tag_feed', tag_uuid=uuid, token=app_rss_token)}}"><img alt="RSS Feed for this watch" style="padding-left: 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
</td>
</tr>
{% endfor %}

View File

@@ -6,7 +6,6 @@ from changedetectionio.store import ChangeDetectionStore
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
from changedetectionio.blueprint.ui import diff, preview
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
from flask import request, flash
@@ -77,14 +76,14 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
elif (op == 'notification-default'):
from changedetectionio.notification import (
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
default_notification_format_for_watch
)
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]['notification_title'] = None
datastore.data['watching'][uuid]['notification_body'] = None
datastore.data['watching'][uuid]['notification_urls'] = []
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch
if emit_flash:
flash(f"{len(uuids)} watches set to use default notification settings")
@@ -122,13 +121,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update)
ui_blueprint.register_blueprint(views_blueprint)
# Register diff and preview blueprints
diff_blueprint = diff.construct_blueprint(datastore)
ui_blueprint.register_blueprint(diff_blueprint)
preview_blueprint = preview.construct_blueprint(datastore)
ui_blueprint.register_blueprint(preview_blueprint)
# Import the login decorator
from changedetectionio.auth_decorator import login_optionally_required
@@ -257,7 +249,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
if i == 0:
flash("No watches available to recheck.")
return redirect(url_for('watchlist.index', **({'tag': tag} if tag else {})))
return redirect(url_for('watchlist.index'))
@ui_blueprint.route("/form/checkbox-operations", methods=['POST'])
@login_optionally_required

View File

@@ -1,243 +0,0 @@
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
import os
import time
import re
import importlib
from loguru import logger
from markupsafe import Markup
from changedetectionio.diff import (
REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
)
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
def construct_blueprint(datastore: ChangeDetectionStore):
diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")
@diff_blueprint.app_template_filter('diff_unescape_difference_spans')
def diff_unescape_difference_spans(content):
"""Emulate Jinja2's auto-escape, then selectively unescape our diff spans."""
from markupsafe import escape
if not content:
return Markup('')
# Step 1: Escape everything like Jinja2 would (this makes it XSS-safe)
escaped_content = escape(str(content))
# Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body()
# Pattern matches the exact structure:
# <span style="{STYLE}" role="{ROLE}" aria-label="{LABEL}" title="{TITLE}">
# Unescape outer span opening tags with full attributes (role, aria-label, title)
# Matches removed/added/changed/changed_into spans
result = re.sub(
rf'&lt;span style=&#34;({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})&#34; '
rf'role=&#34;(deletion|insertion|note)&#34; '
rf'aria-label=&#34;([^&]+?)&#34; '
rf'title=&#34;([^&]+?)&#34;&gt;',
r'<span style="\1" role="\2" aria-label="\3" title="\4">',
str(escaped_content),
flags=re.IGNORECASE
)
# Unescape inner span opening tags (without additional attributes)
# This matches the darker background styles for changed parts within lines
result = re.sub(
rf'&lt;span style=&#34;({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})&#34;&gt;',
r'<span style="\1">',
result,
flags=re.IGNORECASE
)
# Unescape closing tags (but only as many as we opened)
open_count = result.count('<span style=')
close_count = str(escaped_content).count('&lt;/span&gt;')
# Replace up to the number of spans we opened
for _ in range(min(open_count, close_count)):
result = result.replace('&lt;/span&gt;', '</span>', 1)
return Markup(result)
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
@login_optionally_required
def diff_history_page(uuid):
"""
Render the history/diff page for a watch.
This route is processor-aware: it delegates rendering to the processor's
difference.py module, allowing different processor types to provide
custom visualizations:
- text_json_diff: Text/HTML diff with syntax highlighting
- restock_diff: Could show price charts and stock history
- image_diff: Could show image comparison slider/overlay
Each processor implements processors/{type}/difference.py::render()
If a processor doesn't have a difference module, falls back to text_json_diff.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's difference module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
# Call the processor's render() function
if hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
# Fallback: if processor doesn't have difference module, use text_json_diff as default
from changedetectionio.processors.text_json_diff.difference import render as default_render
return default_render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
@login_optionally_required
def diff_history_page_extract_GET(uuid):
"""
Render the data extraction form for a watch.
This route is processor-aware: it delegates to the processor's
extract.py module, allowing different processor types to provide
custom extraction interfaces.
Each processor implements processors/{type}/extract.py::render_form()
If a processor doesn't have an extract module, falls back to text_json_diff.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's extract module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
# Call the processor's render_form() function
if hasattr(processor_module, 'render_form'):
return processor_module.render_form(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
# Fallback: if processor doesn't have extract module, use base processors.extract as default
from changedetectionio.processors.extract import render_form as default_render_form
return default_render_form(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
@login_optionally_required
def diff_history_page_extract_POST(uuid):
"""
Process the data extraction request.
This route is processor-aware: it delegates to the processor's
extract.py module, allowing different processor types to provide
custom extraction logic.
Each processor implements processors/{type}/extract.py::process_extraction()
If a processor doesn't have an extract module, falls back to text_json_diff.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's extract module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
# Call the processor's process_extraction() function
if hasattr(processor_module, 'process_extraction'):
return processor_module.process_extraction(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
make_response=make_response,
send_from_directory=send_from_directory,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
# Fallback: if processor doesn't have extract module, use base processors.extract as default
from changedetectionio.processors.extract import process_extraction as default_process_extraction
return default_process_extraction(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
make_response=make_response,
send_from_directory=send_from_directory,
flash=flash,
redirect=redirect
)
return diff_blueprint

View File

@@ -206,7 +206,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff':
return redirect(url_for('ui.ui_diff.diff_history_page', uuid=uuid))
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid))
return redirect(url_for('watchlist.index', tag=request.args.get("tag",'')))
@@ -223,25 +223,26 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
watch = datastore.data['watching'].get(uuid)
# if system or watch is configured to need a chrome type browser
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
watch_needs_selenium_or_playwright = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
watch_needs_selenium_or_playwright = True
from zoneinfo import available_timezones
# Only works reliably with Playwright
# Import the global plugin system
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras, get_fetcher_capabilities
# Get fetcher capabilities instead of hardcoded logic
capabilities = get_fetcher_capabilities(watch, datastore)
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
c = [watch.get('processor')]
if worker_handler.is_watch_running(uuid):
c.append('checking-now')
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
template_args = {
'available_processors': processors.available_processors(),
'available_timezones': sorted(available_timezones()),
'browser_steps_config': browser_step_ui_config,
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
'extra_classes': ' '.join(c),
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
'extra_processor_config': form.extra_tab_content(),
'extra_title': f" - Edit - {watch.label}",
@@ -251,11 +252,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
'jq_support': jq_support,
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
'app_rss_token': app_rss_token,
'rss_uuid_feed' : {
'label': watch.label,
'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token)
},
'settings_application': datastore.data['settings']['application'],
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
@@ -265,7 +261,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid,
'watch': watch,
'capabilities': capabilities
'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
}
included_content = None
@@ -339,6 +335,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
s = re.sub(r'[0-9]+', r'\\d+', s)
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
return f"<a href={url_for('ui.ui_views.preview_page', uuid=uuid)}>Click to preview</a>"
return edit_blueprint

View File

@@ -2,6 +2,7 @@ from flask import Blueprint, request, make_response
import random
from loguru import logger
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
@@ -14,7 +15,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@notification_blueprint.route("/notification/send-test/", methods=['POST'])
@login_optionally_required
def ajax_callback_send_notification_test(watch_uuid=None):
from changedetectionio.notification_service import NotificationContextData, set_basic_notification_vars
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
import apprise
from changedetectionio.notification.handler import process_notification
@@ -38,7 +39,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
watch = datastore.data['watching'].get(watch_uuid)
notification_urls = request.form.get('notification_urls','').strip().splitlines()
notification_urls = None
if request.form.get('notification_urls'):
notification_urls = request.form['notification_urls'].strip().splitlines()
if not notification_urls:
logger.debug("Test notification - Trying by group/tag in the edit form if available")
@@ -76,8 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Only use if present, if not set in n_object it should use the default system value
if 'notification_format' in request.form and request.form['notification_format'].strip():
n_object['notification_format'] = request.form.get('notification_format', '').strip()
else:
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
if 'notification_title' in request.form and request.form['notification_title'].strip():
n_object['notification_title'] = request.form.get('notification_title', '').strip()
@@ -94,27 +97,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
n_object['notification_body'] = "Test body"
n_object['as_async'] = False
# Same like in notification service, should be refactored
dates = list(watch.history.keys())
trigger_text = ''
snapshot_contents = ''
# Could be called as a 'test notification' with only 1 snapshot available
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
if len(dates) > 1:
prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2])
current_snapshot = watch.get_history_snapshot(timestamp=dates[-1])
n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot,
prev_snapshot=prev_snapshot,
watch=watch,
triggered_text=trigger_text,
timestamp_changed=dates[-1] if dates else None))
n_object.update(watch.extra_notification_token_values())
sent_obj = process_notification(n_object, datastore)
except Exception as e:

View File

@@ -1,131 +0,0 @@
from flask import Blueprint, request, url_for, flash, render_template, redirect
import time
from loguru import logger
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
from changedetectionio import html_tools
def construct_blueprint(datastore: ChangeDetectionStore):
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
@login_optionally_required
def preview_page(uuid):
"""
Render the preview page for a watch.
This route is processor-aware: it delegates rendering to the processor's
preview.py module, allowing different processor types to provide
custom visualizations:
- text_json_diff: Text preview with syntax highlighting
- image_ssim_diff: Image preview with proper rendering
- restock_diff: Could show latest price/stock data
Each processor implements processors/{type}/preview.py::render()
If a processor doesn't have a preview module, falls back to default text preview.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's preview module
import importlib
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
# Call the processor's render() function
if hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
# Fallback: if processor doesn't have preview module, use default text preview
content = []
versions = []
timestamp = None
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
triggered_line_numbers = []
ignored_line_numbers = []
blocked_line_numbers = []
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
else:
# So prepare the latest preview or not
preferred_version = request.args.get('version')
versions = list(watch.history.keys())
timestamp = versions[-1]
if preferred_version and preferred_version in versions:
timestamp = preferred_version
try:
versions = list(watch.history.keys())
content = watch.get_history_snapshot(timestamp=timestamp)
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
wordlist=watch.get('trigger_text'),
mode='line numbers'
)
ignored_line_numbers = html_tools.strip_ignore_text(content=content,
wordlist=watch.get('ignore_text'),
mode='line numbers'
)
blocked_line_numbers = html_tools.strip_ignore_text(content=content,
wordlist=watch.get("text_should_not_be_present"),
mode='line numbers'
)
except Exception as e:
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
from changedetectionio.pluggy_interface import get_fetcher_capabilities
capabilities = get_fetcher_capabilities(watch, datastore)
output = render_template("preview.html",
capabilities=capabilities,
content=content,
current_diff_url=watch['url'],
current_version=timestamp,
extra_stylesheets=extra_stylesheets,
extra_title=f" - Diff - {watch.label} @ {timestamp}",
highlight_ignored_line_numbers=ignored_line_numbers,
highlight_triggered_line_numbers=triggered_line_numbers,
highlight_blocked_line_numbers=blocked_line_numbers,
history_n=watch.history_n,
is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(),
screenshot=watch.get_screenshot(),
uuid=uuid,
versions=versions,
watch=watch,
)
return output
return preview_blueprint

View File

@@ -1,12 +0,0 @@
<ul id="highlightSnippetActions">
<li>
<button class="pure-button pure-button-primary" onclick="diffToJpeg()" title="Share diff as image">Share as Image</button>
</li>
<li>
<a class="pure-button pure-button-primary" data-mode="exact" href="javascript:void(0);">Ignore any lines matching</a>
</li>
<li>
<a class="pure-button pure-button-primary" data-mode="digit-regex" href="javascript:void(0);" >Ignore any lines matching excluding digits</a>
</li>
</ul>

View File

@@ -1,166 +0,0 @@
{% extends 'base.html' %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
{% block content %}
<script>
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
{% if last_error_screenshot %}
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
{% endif %}
const highlight_submit_ignore_url="{{url_for('ui.ui_edit.highlight_submit_ignore_url', uuid=uuid)}}";
const watch_url= {{watch_a.link|tojson}};
// Initial scroll position: if set, scroll to this line number in #difference on page load
const initialScrollToLineNumber = {{ initial_scroll_line_number|default('null') }};
</script>
<script src="https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js"></script>
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}"></script>
<script src="https://cdn.jsdelivr.net/npm/piexifjs@1.0.6/piexif.min.js"></script>
<script src="{{url_for('static_content', group='js', filename='snippet-to-image.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
<div id="settings">
<form class="pure-form " action="{{ url_for("ui.ui_diff.diff_history_page", uuid=uuid) }}" method="GET" id="diff-form">
<fieldset class="diff-fieldset">
{% if versions|length >= 1 %}
<span style="white-space: nowrap;">
<label id="change-from" for="diff-from-version" class="from-to-label">From</label>
<select id="diff-from-version" name="from_version" class="needs-localtime">
{%- for version in versions|reverse -%}
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
{{ version }}{#{% if loop.index == 2 %} (Previous){% endif %}#}
</option>
{%- endfor -%}
</select>
</span>
<span style="white-space: nowrap;">
<label id="change-to" for="diff-to-version" class="from-to-label">To</label>
<select id="diff-to-version" name="to_version" class="needs-localtime">
{%- for version in versions|reverse -%}
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
{{ version }}{#{% if loop.first %} (Current){% endif %}#}
</option>
{%- endfor -%}
</select>
</span>
{#<button type="submit" class="pure-button pure-button-primary reset-margin">Go</button>#}
{% endif %}
</fieldset>
<fieldset id="diff-style">
<span>
<label for="diffWords" class="pure-checkbox">
<input type="radio" name="type" id="diffWords" value="diffWords" {% if diff_prefs.type == 'diffWords' %}checked=""{% endif %}> Words</label>
</span>
<span>
<label for="diffLines" class="pure-checkbox">
<input type="radio" name="type" id="diffLines" value="diffLines" {% if diff_prefs.type == 'diffLines' %}checked=""{% endif %}> Lines</label>
</span>
<span>
<label for="ignoreWhitespace" class="pure-checkbox" id="label-diff-ignorewhitespace">
<input type="checkbox" id="ignoreWhitespace" name="ignoreWhitespace" {% if diff_prefs.ignoreWhitespace %}checked=""{% endif %}> Ignore Whitespace</label>
</span>
<span>
<label for="changesOnly" class="pure-checkbox" id="label-diff-changes">
<input type="checkbox" id="changesOnly" name="changesOnly" {% if diff_prefs.changesOnly %}checked=""{% endif %}> Same/non-changed</label>
</span>
<span>
<label for="removed" class="pure-checkbox" id="label-diff-removed">
<input type="checkbox" id="removed" name="removed" {% if diff_prefs.removed %}checked=""{% endif %}> Removed</label>
</span>
<span>
<label for="added" class="pure-checkbox" id="label-diff-added">
<input type="checkbox" id="added" name="added" {% if diff_prefs.added %}checked=""{% endif %}> Added</label>
</span>
<span>
<label for="replaced" class="pure-checkbox" id="label-diff-replaced">
<input type="checkbox" id="replaced" name="replaced" {% if diff_prefs.replaced %}checked=""{% endif %}> Replaced</label>
</span>
</fieldset>
{%- if versions|length >= 2 -%}
<div id="keyboard-nav">
<strong>Keyboard: </strong>
<a href="" class="pure-button pure-button-primary" id="btn-previous"> &larr; Previous</a>
&nbsp; <a class="pure-button pure-button-primary" id="btn-next" href=""> &rarr; Next</a>
</div>
{%- endif -%}
</form>
</div>
<div id="diff-jump">
<a id="jump-next-diff" title="Jump to next difference">Jump</a>
</div>
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="tabs">
<ul>
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">Error Screenshot</a></li> {% endif %}
<li class="tab" id=""><a href="#text">Text</a></li>
<li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
<li class="tab" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">Extract Data</a></li>
</ul>
</div>
<div id="diff-ui">
<div class="tab-pane-inner" id="error-text">
<div class="snapshot-age error">{{watch_a.error_text_ctime|format_seconds_ago}} seconds ago.</div>
<pre>
{{ last_error_text }}
</pre>
</div>
<div class="tab-pane-inner" id="error-screenshot">
<div class="snapshot-age error">{{watch_a.snapshot_error_screenshot_ctime|format_seconds_ago}} seconds ago</div>
<img id="error-screenshot-img" style="max-width: 80%" alt="Current error-ing screenshot from most recent request" >
</div>
<div class="tab-pane-inner" id="text">
{%- if (content | default('')).split('\n') | length > 100 -%}
<div id="cell-diff-jump-visualiser" style="user-select: none;">
{%- for cell in diff_cell_grid -%}
<div{% if cell.class %} class="{{ cell.class }}"{% endif %}></div>
{%- endfor -%}
</div>
{%- endif -%}
{%- if password_enabled_and_share_is_off -%}
<div class="tip">Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings.
</div>
{%- endif -%}
<div id="text-diff-heading-area" style="user-select: none;">
<div class="snapshot-age"><span>{{ from_version|format_timestamp_timeago }}</span>
{%- if note -%}<span class="note"><strong>{{ note }}</strong></span>{%- endif -%}
<a href="{{ url_for("ui.ui_preview.preview_page", uuid=uuid) }}">Goto single snapshot</a>
</div>
</div>
<pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
<div id="diff-visualiser-area-after" style="user-select: none;">
<strong>Tip:</strong> Highlight text to share or add to ignore lists.
</div>
</div>
<div class="tab-pane-inner" id="screenshot">
<div class="tip">
For now, Differences are performed on text, not graphically, only the latest screenshot is available.
</div>
{% if is_html_webdriver %}
{% if screenshot %}
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request" >
{% else %}
No screenshot available just yet! Try rechecking the page.
{% endif %}
{% else %}
<strong>Screenshot requires Playwright/WebDriver enabled</strong>
{% endif %}
</div>
</div>
<script>
const newest_version_timestamp = {{newest_version_timestamp}};
</script>
<script src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
{% endblock %}

View File

@@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
@@ -29,7 +29,7 @@
const default_system_fetch_backend="{{ settings_application['fetch_backend'] }}";
</script>
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename=watch['processor']+".js")}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
{% if playwright_enabled %}
@@ -50,8 +50,10 @@
{% endif %}
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
<!-- should goto extra forms? -->
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'restock_diff' %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
<li class="tab" id="conditions-tab"><a href="#conditions">Conditions</a></li>
{% endif %}
@@ -206,8 +208,9 @@ Math: {{ 1 + 1 }}") }}
</div>
<div class="tab-pane-inner" id="browser-steps">
{% if capabilities.supports_browser_steps %}
{% if visual_selector_data_ready %}
{% if watch_needs_selenium_or_playwright %}
{# Only works with playwright #}
{% if system_has_playwright_configured %}
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
<fieldset>
<div class="pure-control-group">
@@ -247,13 +250,15 @@ Math: {{ 1 + 1 }}") }}
</div>
</fieldset>
{% else %}
<strong>Visual Selector data is not ready, watch needs to be checked atleast once.</strong>
{# it's configured to use selenium or chrome but system says its not configured #}
{{ playwright_warning() }}
{% if system_has_webdriver_configured %}
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
{% endif %}
{% endif %}
{% else %}
<p>
<strong>Sorry, this functionality only works with fetchers that support interactive Javascript (so far only Playwright based fetchers)<br>
You need to <a href="#request">Set the fetch method</a> to one that supports interactive Javascript.</strong>
</p>
{# "This functionality needs chrome.." #}
{{ only_playwright_type_watches_warning() }}
{% endif %}
</div>
@@ -263,7 +268,7 @@ Math: {{ 1 + 1 }}") }}
<div class="pure-control-group inline-radio">
{{ render_ternary_field(form.notification_muted, BooleanField=true) }}
</div>
{% if capabilities.supports_screenshots %}
{% if watch_needs_selenium_or_playwright %}
<div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.notification_screenshot) }}
<span class="pure-form-message-inline">
@@ -284,7 +289,7 @@ Math: {{ 1 + 1 }}") }}
</fieldset>
</div>
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner" id="conditions">
<script>
@@ -348,22 +353,21 @@ Math: {{ 1 + 1 }}") }}
</div>
</div>
<div id="text-preview" style="display: none;" >
<script>
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
</script>
<br>
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
<div class="minitabs-wrapper">
<div class="minitabs-content">
<div id="text-preview-inner" class="monospace-preview">
<p>Loading...</p>
<script>
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
</script>
<br>
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
<div class="minitabs-wrapper">
<div class="minitabs-content">
<div id="text-preview-inner" class="monospace-preview">
<p>Loading...</p>
</div>
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
<p>Loading...</p>
</div>
</div>
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
<p>Loading...</p>
</div>
</div>
</div>
{{ highlight_trigger_ignored_explainer() }}
</div>
</div>
</div>
</div>
@@ -375,39 +379,41 @@ Math: {{ 1 + 1 }}") }}
{{ extra_form_content|safe }}
</div>
{% endif %}
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'restock_diff' %}
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
<fieldset>
<div class="pure-control-group">
{% if capabilities.supports_screenshots and capabilities.supports_xpath_element_data %}
{% if visual_selector_data_ready %}
<span class="pure-form-message-inline" id="visual-selector-heading">
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
</span>
{% if watch_needs_selenium_or_playwright %}
{% if system_has_playwright_configured %}
<span class="pure-form-message-inline" id="visual-selector-heading">
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
</span>
<div id="selector-header">
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
<i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
</div>
<div id="selector-wrapper" style="display: none">
<!-- request the screenshot and get the element offset info ready -->
<!-- use img src ready load to know everything is ready to map out -->
<!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
<img id="selector-background" >
<canvas id="selector-canvas"></canvas>
</div>
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
{% else %}
<strong>Visual Selector data is not ready, watch needs to be checked atleast once.</strong>
{% endif %}
<div id="selector-header">
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
<i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
</div>
<div id="selector-wrapper" style="display: none">
<!-- request the screenshot and get the element offset info ready -->
<!-- use img src ready load to know everything is ready to map out -->
<!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
<img id="selector-background" >
<canvas id="selector-canvas"></canvas>
</div>
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
{% else %}
{# The watch needed chrome but system says that playwright is not ready #}
{{ playwright_warning() }}
{% endif %}
{% if system_has_webdriver_configured %}
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
{% endif %}
{% else %}
<p>
<strong>Sorry, this functionality only works with fetchers that support Javascript and screenshots (such as playwright etc).<br>
You need to <a href="#request">Set the fetch method</a> to one that supports Javascript and screenshots.</strong>
</p>
{# "This functionality needs chrome.." #}
{{ only_playwright_type_watches_warning() }}
{% endif %}
</div>
</fieldset>
@@ -472,7 +478,6 @@ Math: {{ 1 + 1 }}") }}
class="pure-button button-error">Clear History</a>{% endif %}
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
class="pure-button">Clone &amp; Edit</a>
<a href="{{ url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token)}}"><img alt="RSS Feed for this watch" style="padding: .5em 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
</div>
</div>
</form>

View File

@@ -1,11 +1,207 @@
from flask import Blueprint, request, redirect, url_for, flash
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
import os
import time
from loguru import logger
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
from changedetectionio import html_tools
from changedetectionio import worker_handler
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
@login_optionally_required
def preview_page(uuid):
content = []
versions = []
timestamp = None
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
triggered_line_numbers = []
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
else:
# So prepare the latest preview or not
preferred_version = request.args.get('version')
versions = list(watch.history.keys())
timestamp = versions[-1]
if preferred_version and preferred_version in versions:
timestamp = preferred_version
try:
versions = list(watch.history.keys())
content = watch.get_history_snapshot(timestamp)
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
wordlist=watch['trigger_text'],
mode='line numbers'
)
except Exception as e:
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
output = render_template("preview.html",
content=content,
current_version=timestamp,
history_n=watch.history_n,
extra_stylesheets=extra_stylesheets,
extra_title=f" - Diff - {watch.label} @ {timestamp}",
triggered_line_numbers=triggered_line_numbers,
current_diff_url=watch['url'],
screenshot=watch.get_screenshot(),
watch=watch,
uuid=uuid,
is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot(),
versions=versions
)
return output
@views_blueprint.route("/diff/<string:uuid>", methods=['POST'])
@login_optionally_required
def diff_history_page_build_report(uuid):
from changedetectionio import forms
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# For submission of requesting an extract
extract_form = forms.extractDataForm(formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
if not extract_form.validate():
flash("An error occurred, please see below.", "error")
return _render_diff_template(uuid, extract_form)
else:
extract_regex = request.form.get('extract_regex', '').strip()
output = watch.extract_regex_from_all_history(extract_regex)
if output:
watch_dir = os.path.join(datastore.datastore_path, uuid)
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
response.headers['Content-type'] = 'text/csv'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = "0"
return response
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
def _render_diff_template(uuid, extract_form=None):
"""Helper function to render the diff template with all required data"""
from changedetectionio import forms
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Use provided form or create a new one
if extract_form is None:
extract_form = forms.extractDataForm(formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
history = watch.history
dates = list(history.keys())
# If a "from_version" was requested, then find it (or the closest one)
# Also set "from version" to be the closest version to the one that was last viewed.
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
from_version = request.args.get('from_version', from_version_timestamp )
# Use the current one if nothing was specified
to_version = request.args.get('to_version', str(dates[-1]))
try:
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
except Exception as e:
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
try:
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
except Exception as e:
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
screenshot_url = watch.get_screenshot()
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
datastore.set_last_viewed(uuid, time.time())
return render_template("diff.html",
current_diff_url=watch['url'],
from_version=str(from_version),
to_version=str(to_version),
extra_stylesheets=extra_stylesheets,
extra_title=f" - Diff - {watch.label}",
extract_form=extract_form,
is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(),
left_sticky=True,
newest=to_version_file_contents,
newest_version_timestamp=dates[-1],
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
from_version_file_contents=from_version_file_contents,
to_version_file_contents=to_version_file_contents,
screenshot=screenshot_url,
uuid=uuid,
versions=dates, # All except current/last
watch_a=watch
)
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
@login_optionally_required
def diff_history_page(uuid):
return _render_diff_template(uuid)
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required

View File

@@ -167,7 +167,7 @@ document.addEventListener('DOMContentLoaded', function() {
{% endif %}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
</span>
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
{%- if watch['processor'] == 'text_json_diff' -%}
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
@@ -182,9 +182,11 @@ document.addEventListener('DOMContentLoaded', function() {
</div>
<div class="status-icons">
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
{{ effective_fetcher|fetcher_status_icons }}
{%- if watch.get_fetch_backend == "html_webdriver"
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
or "extra_browser_" in watch.get_fetch_backend
-%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
{%- endif -%}
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
@@ -205,7 +207,7 @@ document.addEventListener('DOMContentLoaded', function() {
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
{%- if watch['restock']['price'] != None -%}
<span class="restock-label price" title="Price">
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
</span>
{%- endif -%}
{%- elif not watch.has_restock_info -%}
@@ -217,7 +219,7 @@ document.addEventListener('DOMContentLoaded', function() {
{#last_checked becomes fetch-start-time#}
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
<div class="spinner-wrapper" style="display:none;" >
<span class="spinner"></span><span class="status-text">&nbsp;Checking now</span>
<span class="spinner"></span><span>&nbsp;Checking now</span>
</div>
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
</td>
@@ -233,8 +235,8 @@ document.addEventListener('DOMContentLoaded', function() {
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
<a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
<a href="{{ url_for('ui.ui_preview.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
</div>
</td>
</tr>

View File

@@ -1,7 +1,3 @@
"""
Levenshtein distance and similarity plugin for text change detection.
Provides metrics for measuring text similarity between snapshots.
"""
import pluggy
from loguru import logger

View File

@@ -1,7 +1,3 @@
"""
Word count plugin for content analysis.
Provides word count metrics for snapshot content.
"""
import pluggy
from loguru import logger
@@ -18,7 +14,7 @@ def count_words_in_history(watch, incoming_text=None):
elif watch.history.keys():
# When called from UI extras to count latest snapshot
latest_key = list(watch.history.keys())[-1]
latest_content = watch.get_history_snapshot(timestamp=latest_key)
latest_content = watch.get_history_snapshot(latest_key)
return len(latest_content.split())
return 0
except Exception as e:

View File

@@ -7,9 +7,6 @@ import os
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
# Import hookimpl from centralized pluggy interface
from changedetectionio.pluggy_interface import hookimpl
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
SCREENSHOT_DEFAULT_QUALITY = 40
@@ -21,9 +18,7 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
# The size at which we will switch to stitching method, when below this (and
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
# screenshot method.
# Increased from 8000 to 10000 for better performance (fewer chunks = faster)
# Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
# available_fetchers() will scan this implementation looking for anything starting with html_
# this information is used in the form selections
@@ -40,54 +35,17 @@ def available_fetchers():
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
import inspect
p = []
# Get built-in fetchers (but skip plugin fetchers that were added via setattr)
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
if inspect.isclass(obj):
# @todo html_ is maybe better as fetcher_ or something
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
if name.startswith('html_'):
# Skip plugin fetchers that were already registered
if name not in _plugin_fetchers:
t = tuple([name, obj.fetcher_description])
p.append(t)
# Get plugin fetchers from cache (already loaded at module init)
for name, fetcher_class in _plugin_fetchers.items():
if hasattr(fetcher_class, 'fetcher_description'):
t = tuple([name, fetcher_class.fetcher_description])
p.append(t)
else:
logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
t = tuple([name, obj.fetcher_description])
p.append(t)
return p
def get_plugin_fetchers():
"""Load and return all plugin fetchers from the centralized plugin manager."""
from changedetectionio.pluggy_interface import plugin_manager
fetchers = {}
try:
# Call the register_content_fetcher hook from all registered plugins
results = plugin_manager.hook.register_content_fetcher()
for result in results:
if result:
name, fetcher_class = result
fetchers[name] = fetcher_class
# Register in current module so hasattr() checks work
setattr(sys.modules[__name__], name, fetcher_class)
logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
except Exception as e:
logger.error(f"Error loading plugin fetchers: {e}")
return fetchers
# Initialize plugins at module load time
_plugin_fetchers = get_plugin_fetchers()
# Decide which is the 'real' HTML webdriver, this is more a system wide config
# rather than site-specific.
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
@@ -104,8 +62,3 @@ else:
logger.debug("Falling back to selenium as fetcher")
from .webdriver_selenium import fetcher as html_webdriver
# Register built-in fetchers as plugins after all imports are complete
from changedetectionio.pluggy_interface import register_builtin_fetchers
register_builtin_fetchers()

View File

@@ -64,30 +64,6 @@ class Fetcher():
# Time ONTOP of the system defined env minimum time
render_extract_delay = 0
# Fetcher capability flags - subclasses should override these
# These indicate what features the fetcher supports
supports_browser_steps = False # Can execute browser automation steps
supports_screenshots = False # Can capture page screenshots
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
@classmethod
def get_status_icon_data(cls):
"""Return data for status icon to display in the watch overview.
This method can be overridden by subclasses to provide custom status icons.
Returns:
dict or None: Dictionary with icon data:
{
'filename': 'icon-name.svg', # Icon filename
'alt': 'Alt text', # Alt attribute
'title': 'Tooltip text', # Title attribute
'style': 'height: 1em;' # Optional inline CSS
}
Or None if no icon
"""
return None
def clear_content(self):
"""
Explicitly clear all content from memory to free up heap space.
@@ -99,6 +75,7 @@ class Fetcher():
self.screenshot = None
self.xpath_data = None
# Keep headers and status_code as they're small
logger.trace("Fetcher content cleared from memory")
@abstractmethod
def get_error(self):
@@ -116,7 +93,6 @@ class Fetcher():
request_method=None,
timeout=None,
url=None,
watch_uuid=None,
):
# Should set self.error, self.status_code and self.content
pass

View File

@@ -36,48 +36,35 @@ async def capture_full_page_async(page):
# Capture screenshots in chunks up to the max total height
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
# Only scroll if not at the top (y > 0)
if y > 0:
await page.evaluate(f"window.scrollTo(0, {y})")
# Request GC only before screenshot (not 3x per chunk)
await page.request_gc()
await page.evaluate(f"window.scrollTo(0, {y})")
await page.request_gc()
screenshot_chunks.append(await page.screenshot(
type="jpeg",
full_page=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
))
y += step_size
await page.request_gc()
# Restore original viewport size
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
# If we have multiple chunks, stitch them together
if len(screenshot_chunks) > 1:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
# Only use separate process for many chunks (4+) to avoid blocking the event loop
if len(screenshot_chunks) <= 3:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
else:
# Use separate process for many chunks to avoid blocking
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
parent_conn, child_conn = Pipe()
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
screenshot = parent_conn.recv_bytes()
p.join()
# Explicit cleanup
del p
del parent_conn, child_conn
parent_conn, child_conn = Pipe()
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
screenshot = parent_conn.recv_bytes()
p.join()
logger.debug(
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
# Explicit cleanup
del screenshot_chunks
del p
del parent_conn, child_conn
screenshot_chunks = None
return screenshot
@@ -102,20 +89,6 @@ class fetcher(Fetcher):
proxy = None
# Capability flags
supports_browser_steps = True
supports_screenshots = True
supports_xpath_element_data = True
@classmethod
def get_status_icon_data(cls):
"""Return Chrome browser icon data for Playwright fetcher."""
return {
'filename': 'google-chrome-icon.png',
'alt': 'Using a Chrome browser',
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
@@ -166,7 +139,7 @@ class fetcher(Fetcher):
content = await self.page.content()
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
logger.debug(f"Saving step HTML to {destination}")
with open(destination, 'w', encoding='utf-8') as f:
with open(destination, 'w') as f:
f.write(content)
async def run(self,
@@ -180,7 +153,6 @@ class fetcher(Fetcher):
request_method=None,
timeout=None,
url=None,
watch_uuid=None,
):
from playwright.async_api import async_playwright
@@ -358,17 +330,4 @@ class fetcher(Fetcher):
browser = None
# Plugin registration for built-in fetcher
class PlaywrightFetcherPlugin:
"""Plugin class that registers the Playwright fetcher as a built-in plugin."""
def register_content_fetcher(self):
"""Register the Playwright fetcher"""
return ('html_webdriver', fetcher)
# Create module-level instance for plugin registration
playwright_plugin = PlaywrightFetcherPlugin()

View File

@@ -98,20 +98,6 @@ class fetcher(Fetcher):
proxy = None
# Capability flags
supports_browser_steps = True
supports_screenshots = True
supports_xpath_element_data = True
@classmethod
def get_status_icon_data(cls):
"""Return Chrome browser icon data for Puppeteer fetcher."""
return {
'filename': 'google-chrome-icon.png',
'alt': 'Using a Chrome browser',
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
@@ -169,7 +155,6 @@ class fetcher(Fetcher):
request_method,
timeout,
url,
watch_uuid
):
import re
self.delete_browser_steps_screenshots()
@@ -377,7 +362,6 @@ class fetcher(Fetcher):
request_method=None,
timeout=None,
url=None,
watch_uuid=None,
):
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
@@ -396,21 +380,7 @@ class fetcher(Fetcher):
request_method=request_method,
timeout=timeout,
url=url,
watch_uuid=watch_uuid,
), timeout=max_time
)
except asyncio.TimeoutError:
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
# Plugin registration for built-in fetcher
class PuppeteerFetcherPlugin:
"""Plugin class that registers the Puppeteer fetcher as a built-in plugin."""
def register_content_fetcher(self):
"""Register the Puppeteer fetcher"""
return ('html_webdriver', fetcher)
# Create module-level instance for plugin registration
puppeteer_plugin = PuppeteerFetcherPlugin()

View File

@@ -1,7 +1,6 @@
from loguru import logger
import hashlib
import os
import re
import asyncio
from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
@@ -26,9 +25,7 @@ class fetcher(Fetcher):
ignore_status_codes=False,
current_include_filters=None,
is_binary=False,
empty_pages_are_a_change=False,
watch_uuid=None,
):
empty_pages_are_a_change=False):
"""Synchronous version of run - the original requests implementation"""
import chardet
@@ -79,22 +76,9 @@ class fetcher(Fetcher):
if not is_binary:
# Don't run this for PDF (and requests identified as binary) takes a _long_ time
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
# For XML/RSS feeds, check the XML declaration for encoding attribute
# This is more reliable than chardet which can misdetect UTF-8 as MacRoman
content_type = r.headers.get('content-type', '').lower()
if 'xml' in content_type or 'rss' in content_type:
# Look for <?xml version="1.0" encoding="UTF-8"?>
xml_encoding_match = re.search(rb'<\?xml[^>]+encoding=["\']([^"\']+)["\']', r.content[:200])
if xml_encoding_match:
r.encoding = xml_encoding_match.group(1).decode('ascii')
else:
# Default to UTF-8 for XML if no encoding found
r.encoding = 'utf-8'
else:
# For other content types, use chardet
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
self.headers = r.headers
@@ -120,12 +104,6 @@ class fetcher(Fetcher):
self.raw_content = r.content
# If the content is an image, set it as screenshot for SSIM/visual comparison
content_type = r.headers.get('content-type', '').lower()
if 'image/' in content_type:
self.screenshot = r.content
logger.debug(f"Image content detected ({content_type}), set as screenshot for comparison")
async def run(self,
fetch_favicon=True,
current_include_filters=None,
@@ -137,7 +115,6 @@ class fetcher(Fetcher):
request_method=None,
timeout=None,
url=None,
watch_uuid=None,
):
"""Async wrapper that runs the synchronous requests code in a thread pool"""
@@ -155,8 +132,7 @@ class fetcher(Fetcher):
ignore_status_codes=ignore_status_codes,
current_include_filters=current_include_filters,
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change,
watch_uuid=watch_uuid,
empty_pages_are_a_change=empty_pages_are_a_change
)
)
@@ -173,15 +149,3 @@ class fetcher(Fetcher):
except Exception as e:
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")
# Plugin registration for built-in fetcher
class RequestsFetcherPlugin:
"""Plugin class that registers the requests fetcher as a built-in plugin."""
def register_content_fetcher(self):
"""Register the requests fetcher"""
return ('html_requests', fetcher)
# Create module-level instance for plugin registration
requests_plugin = RequestsFetcherPlugin()

View File

@@ -8,90 +8,14 @@ from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
# Cache font to avoid loading on every stitch
_cached_font = None
def _get_caption_font():
"""Get or create cached font for caption text."""
global _cached_font
if _cached_font is None:
from PIL import ImageFont
try:
_cached_font = ImageFont.truetype("arial.ttf", 35)
except IOError:
_cached_font = ImageFont.load_default()
return _cached_font
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
"""
Stitch image chunks together inline (no multiprocessing).
Optimized for small number of chunks (2-3) to avoid process creation overhead.
Args:
chunks_bytes: List of JPEG image bytes
original_page_height: Original page height in pixels
capture_height: Maximum capture height
Returns:
bytes: Stitched JPEG image
"""
import os
import io
from PIL import Image, ImageDraw
# Load images from byte chunks
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
total_height = sum(im.height for im in images)
max_width = max(im.width for im in images)
# Create stitched image
stitched = Image.new('RGB', (max_width, total_height))
y_offset = 0
for im in images:
stitched.paste(im, (0, y_offset))
y_offset += im.height
im.close() # Close immediately after pasting
# Draw caption only if page was trimmed
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10
font = _get_caption_font()
bbox = draw.textbbox((0, 0), caption_text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Draw white background rectangle
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
# Draw text centered
text_x = (max_width - text_width) // 2
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
# Encode to JPEG
output = io.BytesIO()
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
result = output.getvalue()
# Cleanup
stitched.close()
return result
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
"""
Stitch image chunks together in a separate process.
Used for large number of chunks (4+) to avoid blocking the main event loop.
"""
import os
import io
from PIL import Image, ImageDraw, ImageFont
try:
# Load images from byte chunks
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
total_height = sum(im.height for im in images)
@@ -103,17 +27,21 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
for im in images:
stitched.paste(im, (0, y_offset))
y_offset += im.height
im.close() # Close immediately after pasting
# Draw caption only if page was trimmed
# Draw caption on top (overlaid, not extending canvas)
draw = ImageDraw.Draw(stitched)
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10
font_size = 35
font_color = (255, 0, 0)
background_color = (255, 255, 255)
# Try to load font
# Try to load a proper font
try:
font = ImageFont.truetype("arial.ttf", 35)
font = ImageFont.truetype("arial.ttf", font_size)
except IOError:
font = ImageFont.load_default()
@@ -121,16 +49,19 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Draw white background rectangle
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
# Draw white rectangle background behind text
rect_top = 0
rect_bottom = text_height + 2 * padding
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
# Draw text centered
# Draw text centered horizontally, 10px padding from top of the rectangle
text_x = (max_width - text_width) // 2
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
text_y = padding
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
# Encode and send image with optimization
# Encode and send image
output = io.BytesIO()
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
pipe_conn.send_bytes(output.getvalue())
stitched.close()

View File

@@ -14,20 +14,6 @@ class fetcher(Fetcher):
proxy = None
proxy_url = None
# Capability flags
supports_browser_steps = True
supports_screenshots = True
supports_xpath_element_data = True
@classmethod
def get_status_icon_data(cls):
"""Return Chrome browser icon data for WebDriver fetcher."""
return {
'filename': 'google-chrome-icon.png',
'alt': 'Using a Chrome browser',
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
from urllib.parse import urlparse
@@ -71,7 +57,6 @@ class fetcher(Fetcher):
request_method=None,
timeout=None,
url=None,
watch_uuid=None,
):
import asyncio
@@ -156,16 +141,3 @@ class fetcher(Fetcher):
# Run the selenium operations in a thread pool to avoid blocking the event loop
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, _run_sync)
# Plugin registration for built-in fetcher
class WebDriverSeleniumFetcherPlugin:
"""Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""
def register_content_fetcher(self):
"""Register the WebDriver Selenium fetcher"""
return ('html_webdriver', fetcher)
# Create module-level instance for plugin registration
webdriver_selenium_plugin = WebDriverSeleniumFetcherPlugin()

113
changedetectionio/diff.py Normal file
View File

@@ -0,0 +1,113 @@
import difflib
from typing import List, Iterator, Union
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
"""Return a slice of the list, or a single element if start == end."""
return lst[start:end] if start != end else [lst[start]]
def customSequenceMatcher(
before: List[str],
after: List[str],
include_equal: bool = False,
include_removed: bool = True,
include_added: bool = True,
include_replaced: bool = True,
include_change_type_prefix: bool = True,
html_colour: bool = False
) -> Iterator[List[str]]:
"""
Compare two sequences and yield differences based on specified parameters.
Args:
before (List[str]): Original sequence
after (List[str]): Modified sequence
include_equal (bool): Include unchanged parts
include_removed (bool): Include removed parts
include_added (bool): Include added parts
include_replaced (bool): Include replaced parts
include_change_type_prefix (bool): Add prefixes to indicate change types
html_colour (bool): Use HTML background colors for differences
Yields:
List[str]: Differences between sequences
"""
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if include_equal and tag == 'equal':
yield before[alo:ahi]
elif include_removed and tag == 'delete':
if html_colour:
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)]
else:
yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi)
elif include_replaced and tag == 'replace':
if html_colour:
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
[f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
else:
yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \
[f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
elif include_added and tag == 'insert':
if html_colour:
yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
else:
yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi)
def render_diff(
previous_version_file_contents: str,
newest_version_file_contents: str,
include_equal: bool = False,
include_removed: bool = True,
include_added: bool = True,
include_replaced: bool = True,
line_feed_sep: str = "\n",
include_change_type_prefix: bool = True,
patch_format: bool = False,
html_colour: bool = False
) -> str:
"""
Render the difference between two file contents.
Args:
previous_version_file_contents (str): Original file contents
newest_version_file_contents (str): Modified file contents
include_equal (bool): Include unchanged parts
include_removed (bool): Include removed parts
include_added (bool): Include added parts
include_replaced (bool): Include replaced parts
line_feed_sep (str): Separator for lines in output
include_change_type_prefix (bool): Add prefixes to indicate change types
patch_format (bool): Use patch format for output
html_colour (bool): Use HTML background colors for differences
Returns:
str: Rendered difference
"""
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
if patch_format:
patch = difflib.unified_diff(previous_lines, newest_lines)
return line_feed_sep.join(patch)
rendered_diff = customSequenceMatcher(
before=previous_lines,
after=newest_lines,
include_equal=include_equal,
include_removed=include_removed,
include_added=include_added,
include_replaced=include_replaced,
include_change_type_prefix=include_change_type_prefix,
html_colour=html_colour
)
def flatten(lst: List[Union[str, List[str]]]) -> str:
return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst)
return flatten(rendered_diff)

View File

@@ -1,479 +0,0 @@
"""
Diff rendering module for change detection.
This module provides functions for rendering differences between text content,
with support for various output formats and tokenization strategies.
"""
import difflib
from typing import List, Iterator, Union
from loguru import logger
import diff_match_patch as dmp_module
import re
import time
from .tokenizers import TOKENIZERS, tokenize_words_and_html
# Remember! gmail, outlook etc dont support <style> must be inline.
# Gmail: strips <ins> and <del> tags entirely.
# This is for the WHOLE line background style
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
HTML_REMOVED_STYLE = REMOVED_STYLE # Export alias for handler.py
HTML_ADDED_STYLE = ADDED_STYLE # Export alias for handler.py
# Darker backgrounds for nested highlighting (changed parts within lines)
REMOVED_INNER_STYLE = "background-color: #ff867a; color: #111;"
ADDED_INNER_STYLE = "background-color: #b2e841; color: #444;"
HTML_CHANGED_STYLE = REMOVED_STYLE
HTML_CHANGED_INTO_STYLE = ADDED_STYLE
# Placemarker constants - these get replaced by apply_service_tweaks() in handler.py
# Something that cant get escaped to HTML by accident
REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN'
REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED'
ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN'
ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED'
CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN'
CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED'
CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN'
CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
# Compiled regex patterns for performance
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
"""
Render word-level differences between two lines inline using diff-match-patch library.
Args:
before_line: Original line text
after_line: Modified line text
ignore_junk: Ignore whitespace-only changes
markdown_style: Unused (kept for backwards compatibility)
tokenizer: Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
Returns:
tuple[str, bool]: (diff output with inline word-level highlighting, has_changes flag)
"""
# Normalize whitespace if ignore_junk is enabled
if ignore_junk:
# Normalize whitespace: replace multiple spaces/tabs with single space
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
else:
before_normalized = before_line
after_normalized = after_line
# Use diff-match-patch with word-level tokenization
# Strategy: Use linesToChars to treat words as atomic units
dmp = dmp_module.diff_match_patch()
# Get the tokenizer function from the registry
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
# Tokenize both lines using the selected tokenizer
before_tokens = tokenizer_func(before_normalized)
after_tokens = tokenizer_func(after_normalized or ' ')
# Create mappings for linesToChars (using it for word-mode)
# Join tokens with newline so each "line" is a token
before_text = '\n'.join(before_tokens)
after_text = '\n'.join(after_tokens)
# Use linesToChars for word-mode diffing
lines_result = dmp.diff_linesToChars(before_text, after_text)
line_before, line_after, line_array = lines_result
# Perform diff on the encoded strings
diffs = dmp.diff_main(line_before, line_after, False)
# Convert back to original text
dmp.diff_charsToLines(diffs, line_array)
# Remove the newlines we added for tokenization
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
# DON'T apply semantic cleanup here - it would break token boundaries
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
# We want to preserve the tokenizer's word boundaries
# Check if there are any changes
has_changes = any(op != 0 for op, _ in diffs)
if ignore_junk and not has_changes:
return after_line, False
# Check if the whole line is replaced (no unchanged content)
whole_line_replaced = not any(op == 0 and text.strip() for op, text in diffs)
# Build the output using placemarkers
# When whole line is replaced, wrap entire removed content once and entire added content once
if whole_line_replaced:
removed_tokens = []
added_tokens = []
for op, text in diffs:
if op == 0: # Equal (e.g., whitespace tokens in common positions)
# Include in both removed and added to preserve spacing
removed_tokens.append(text)
added_tokens.append(text)
elif op == -1: # Deletion
removed_tokens.append(text)
elif op == 1: # Insertion
added_tokens.append(text)
# Join all tokens and wrap the entire string once for removed, once for added
result_parts = []
if removed_tokens:
removed_full = ''.join(removed_tokens).rstrip()
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
if added_tokens:
if result_parts: # Add newline between removed and added
result_parts.append('\n')
added_full = ''.join(added_tokens).rstrip()
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
return ''.join(result_parts), has_changes
else:
# Inline changes within the line
result_parts = []
for op, text in diffs:
if op == 0: # Equal
result_parts.append(text)
elif op == 1: # Insertion
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
elif op == -1: # Deletion
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
return ''.join(result_parts), has_changes
def render_nested_line_diff(before_line: str, after_line: str, ignore_junk: bool = False, tokenizer: str = 'words_and_html') -> tuple[str, str, bool]:
"""
Render line-level differences with nested highlighting for changed parts.
Returns two separate lines:
- Before line: light red background with dark red on removed parts
- After line: light green background with dark green on added parts
Args:
before_line: Original line text
after_line: Modified line text
ignore_junk: Ignore whitespace-only changes
tokenizer: Name of tokenizer to use from TOKENIZERS registry
Returns:
tuple[str, str, bool]: (before_with_highlights, after_with_highlights, has_changes)
"""
# Normalize whitespace if ignore_junk is enabled
if ignore_junk:
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
else:
before_normalized = before_line
after_normalized = after_line
# Use diff-match-patch with word-level tokenization
dmp = dmp_module.diff_match_patch()
# Get the tokenizer function from the registry
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
# Tokenize both lines
before_tokens = tokenizer_func(before_normalized)
after_tokens = tokenizer_func(after_normalized or ' ')
# Create mappings for linesToChars
before_text = '\n'.join(before_tokens)
after_text = '\n'.join(after_tokens)
# Use linesToChars for word-mode diffing
lines_result = dmp.diff_linesToChars(before_text, after_text)
line_before, line_after, line_array = lines_result
# Perform diff on the encoded strings
diffs = dmp.diff_main(line_before, line_after, False)
# Convert back to original text
dmp.diff_charsToLines(diffs, line_array)
# Remove the newlines we added for tokenization
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
# DON'T apply semantic cleanup here - it would break token boundaries
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
# We want to preserve the tokenizer's word boundaries
# Check if there are any changes
has_changes = any(op != 0 for op, _ in diffs)
if ignore_junk and not has_changes:
return before_line, after_line, False
# Build the before line (with nested highlighting for removed parts)
before_parts = []
for op, text in diffs:
if op == 0: # Equal
before_parts.append(text)
elif op == -1: # Deletion (in before)
before_parts.append(f'<span style="{REMOVED_INNER_STYLE}">{text}</span>')
# Skip insertions (op == 1) for the before line
before_content = ''.join(before_parts)
# Build the after line (with nested highlighting for added parts)
after_parts = []
for op, text in diffs:
if op == 0: # Equal
after_parts.append(text)
elif op == 1: # Insertion (in after)
after_parts.append(f'<span style="{ADDED_INNER_STYLE}">{text}</span>')
# Skip deletions (op == -1) for the after line
after_content = ''.join(after_parts)
# Wrap content with placemarkers (inner HTML highlighting is preserved)
before_html = f'{CHANGED_PLACEMARKER_OPEN}{before_content}{CHANGED_PLACEMARKER_CLOSED}'
after_html = f'{CHANGED_INTO_PLACEMARKER_OPEN}{after_content}{CHANGED_INTO_PLACEMARKER_CLOSED}'
return before_html, after_html, has_changes
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
"""Return a slice of the list, or a single element if start == end."""
return lst[start:end] if start != end else [lst[start]]
def customSequenceMatcher(
before: List[str],
after: List[str],
include_equal: bool = False,
include_removed: bool = True,
include_added: bool = True,
include_replaced: bool = True,
include_change_type_prefix: bool = True,
word_diff: bool = False,
context_lines: int = 0,
case_insensitive: bool = False,
ignore_junk: bool = False,
tokenizer: str = 'words_and_html'
) -> Iterator[List[str]]:
"""
Compare two sequences and yield differences based on specified parameters.
Args:
before (List[str]): Original sequence
after (List[str]): Modified sequence
include_equal (bool): Include unchanged parts
include_removed (bool): Include removed parts
include_added (bool): Include added parts
include_replaced (bool): Include replaced parts
include_change_type_prefix (bool): Add prefixes to indicate change types
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
case_insensitive (bool): Perform case-insensitive comparison
ignore_junk (bool): Ignore whitespace-only changes
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
Yields:
List[str]: Differences between sequences
"""
# Prepare sequences for comparison (lowercase if case-insensitive, normalize whitespace if ignore_junk)
def prepare_line(line):
if case_insensitive:
line = line.lower()
if ignore_junk:
# Normalize whitespace: replace multiple spaces/tabs with single space
line = WHITESPACE_NORMALIZE_RE.sub(' ', line)
return line
compare_before = [prepare_line(line) for line in before]
compare_after = [prepare_line(line) for line in after]
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=compare_before, b=compare_after)
# When context_lines is set and include_equal is False, we need to track which equal lines to include
if context_lines > 0 and not include_equal:
opcodes = list(cruncher.get_opcodes())
# Mark equal ranges that should be included based on context
included_equal_ranges = set()
for i, (tag, alo, ahi, blo, bhi) in enumerate(opcodes):
if tag != 'equal':
# Include context lines before this change
for j in range(max(0, i - 1), i):
if opcodes[j][0] == 'equal':
prev_alo, prev_ahi = opcodes[j][1], opcodes[j][2]
# Include last N lines of the previous equal block
context_start = max(prev_alo, prev_ahi - context_lines)
for line_num in range(context_start, prev_ahi):
included_equal_ranges.add(line_num)
# Include context lines after this change
for j in range(i + 1, min(len(opcodes), i + 2)):
if opcodes[j][0] == 'equal':
next_alo, next_ahi = opcodes[j][1], opcodes[j][2]
# Include first N lines of the next equal block
context_end = min(next_ahi, next_alo + context_lines)
for line_num in range(next_alo, context_end):
included_equal_ranges.add(line_num)
# Remember! gmail, outlook etc dont support <style> must be inline.
# Gmail: strips <ins> and <del> tags entirely.
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'equal':
if include_equal:
yield before[alo:ahi]
elif context_lines > 0:
# Only include equal lines that are in the context range
context_lines_to_include = [before[i] for i in range(alo, ahi) if i in included_equal_ranges]
if context_lines_to_include:
yield context_lines_to_include
elif include_removed and tag == 'delete':
if include_change_type_prefix:
yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
else:
yield same_slicer(before, alo, ahi)
elif include_replaced and tag == 'replace':
before_lines = same_slicer(before, alo, ahi)
after_lines = same_slicer(after, blo, bhi)
# Use inline word-level diff for single line replacements when word_diff is enabled
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
if ignore_junk and not has_changes:
# No real changes, skip this line
continue
yield [inline_diff]
else:
# Fall back to line-level diff for multi-line changes
if include_change_type_prefix:
yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in before_lines] + \
[f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in after_lines]
else:
yield before_lines + after_lines
elif include_added and tag == 'insert':
if include_change_type_prefix:
yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
else:
yield same_slicer(after, blo, bhi)
def render_diff(
previous_version_file_contents: str,
newest_version_file_contents: str,
include_equal: bool = False,
include_removed: bool = True,
include_added: bool = True,
include_replaced: bool = True,
include_change_type_prefix: bool = True,
patch_format: bool = False,
word_diff: bool = True,
context_lines: int = 0,
case_insensitive: bool = False,
ignore_junk: bool = False,
tokenizer: str = 'words_and_html'
) -> str:
"""
Render the difference between two file contents.
Args:
previous_version_file_contents (str): Original file contents
newest_version_file_contents (str): Modified file contents
include_equal (bool): Include unchanged parts
include_removed (bool): Include removed parts
include_added (bool): Include added parts
include_replaced (bool): Include replaced parts
include_change_type_prefix (bool): Add prefixes to indicate change types
patch_format (bool): Use patch format for output
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
case_insensitive (bool): Perform case-insensitive comparison, By default the test_json_diff/process.py is case sensitive, so this follows same logic
ignore_junk (bool): Ignore whitespace-only changes
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
Returns:
str: Rendered difference
"""
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
now = time.time()
logger.debug(
f"diff options: "
f"include_equal={include_equal}, "
f"include_removed={include_removed}, "
f"include_added={include_added}, "
f"include_replaced={include_replaced}, "
f"include_change_type_prefix={include_change_type_prefix}, "
f"patch_format={patch_format}, "
f"word_diff={word_diff}, "
f"context_lines={context_lines}, "
f"case_insensitive={case_insensitive}, "
f"ignore_junk={ignore_junk}, "
f"tokenizer={tokenizer}"
)
if patch_format:
patch = difflib.unified_diff(previous_lines, newest_lines)
return "\n".join(patch)
rendered_diff = customSequenceMatcher(
before=previous_lines,
after=newest_lines,
include_equal=include_equal,
include_removed=include_removed,
include_added=include_added,
include_replaced=include_replaced,
include_change_type_prefix=include_change_type_prefix,
word_diff=word_diff,
context_lines=context_lines,
case_insensitive=case_insensitive,
ignore_junk=ignore_junk,
tokenizer=tokenizer
)
def flatten(lst: List[Union[str, List[str]]]) -> str:
result = []
for x in lst:
if isinstance(x, list):
result.extend(x)
else:
result.append(x)
return "\n".join(result)
logger.debug(f"Diff generated in {time.time() - now:.2f}s")
return flatten(rendered_diff)
# Export main public API
__all__ = [
'render_diff',
'customSequenceMatcher',
'render_inline_word_diff',
'render_nested_line_diff',
'TOKENIZERS',
'REMOVED_STYLE',
'ADDED_STYLE',
'REMOVED_INNER_STYLE',
'ADDED_INNER_STYLE',
]

View File

@@ -1,23 +0,0 @@
"""
Tokenizers for diff operations.
This module provides various tokenization strategies for use with the diff system.
New tokenizers can be easily added by:
1. Creating a new module in this directory
2. Importing and registering it in the TOKENIZERS dictionary below
"""
from .natural_text import tokenize_words
from .words_and_html import tokenize_words_and_html
# Tokenizer registry - maps tokenizer names to functions
TOKENIZERS = {
'words': tokenize_words,
'words_and_html': tokenize_words_and_html,
}
__all__ = [
'tokenize_words',
'tokenize_words_and_html',
'TOKENIZERS',
]

View File

@@ -1,44 +0,0 @@
"""
Simple word tokenizer using whitespace boundaries.
This is a simpler tokenizer that treats all whitespace as token boundaries
without special handling for HTML tags or other markup.
"""
from typing import List
def tokenize_words(text: str) -> List[str]:
"""
Split text into words using simple whitespace boundaries.
This is a simpler tokenizer that treats all whitespace as token boundaries
without special handling for HTML tags.
Args:
text: Input text to tokenize
Returns:
List of tokens (words and whitespace)
Examples:
>>> tokenize_words("Hello world")
['Hello', ' ', 'world']
>>> tokenize_words("one two")
['one', ' ', ' ', 'two']
"""
tokens = []
current = ''
for char in text:
if char.isspace():
if current:
tokens.append(current)
current = ''
tokens.append(char)
else:
current += char
if current:
tokens.append(current)
return tokens

View File

@@ -1,61 +0,0 @@
"""
Tokenizer that preserves HTML tags as atomic units while splitting on whitespace.
This tokenizer is specifically designed for HTML content where:
- HTML tags should remain intact (e.g., '<p>', '<a href="...">')
- Whitespace tokens are preserved for accurate diff reconstruction
- Words are split on whitespace boundaries
"""
from typing import List
def tokenize_words_and_html(text: str) -> List[str]:
"""
Split text into words and boundaries (spaces, HTML tags).
This tokenizer preserves HTML tags as atomic units while splitting on whitespace.
Useful for content that contains HTML markup.
Args:
text: Input text to tokenize
Returns:
List of tokens (words, spaces, HTML tags)
Examples:
>>> tokenize_words_and_html("<p>Hello world</p>")
['<p>', 'Hello', ' ', 'world', '</p>']
>>> tokenize_words_and_html("<a href='test.com'>link</a>")
['<a href=\\'test.com\\'>', 'link', '</a>']
"""
tokens = []
current = ''
in_tag = False
for char in text:
if char == '<':
# Start of HTML tag
if current:
tokens.append(current)
current = ''
current = '<'
in_tag = True
elif char == '>' and in_tag:
# End of HTML tag
current += '>'
tokens.append(current)
current = ''
in_tag = False
elif char.isspace() and not in_tag:
# Space outside of tag
if current:
tokens.append(current)
current = ''
tokens.append(char)
else:
current += char
if current:
tokens.append(current)
return tokens

View File

@@ -38,7 +38,7 @@ from loguru import logger
from changedetectionio import __version__
from changedetectionio import queuedWatchMetaData
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
from changedetectionio.api.Search import Search
from .time_handler import is_within_schedule
@@ -81,27 +81,6 @@ if os.getenv('FLASK_SERVER_NAME'):
# Disables caching of the templates
app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
# Configure Jinja2 to search for templates in plugin directories
def _configure_plugin_templates():
"""Configure Jinja2 loader to include plugin template directories."""
from jinja2 import ChoiceLoader, FileSystemLoader
from changedetectionio.pluggy_interface import get_plugin_template_paths
# Get plugin template paths
plugin_template_paths = get_plugin_template_paths()
if plugin_template_paths:
# Create a ChoiceLoader that searches app templates first, then plugin templates
loaders = [app.jinja_loader] # Keep the default app loader first
for path in plugin_template_paths:
loaders.append(FileSystemLoader(path))
app.jinja_loader = ChoiceLoader(loaders)
logger.info(f"Configured Jinja2 to search {len(plugin_template_paths)} plugin template directories")
# Configure plugin templates (called after plugins are loaded)
_configure_plugin_templates()
csrf = CSRFProtect()
csrf.init_app(app)
notification_debug_log=[]
@@ -122,12 +101,12 @@ def init_app_secret(datastore_path):
path = os.path.join(datastore_path, "secret.txt")
try:
with open(path, "r", encoding='utf-8') as f:
with open(path, "r") as f:
secret = f.read()
except FileNotFoundError:
import secrets
with open(path, "w", encoding='utf-8') as f:
with open(path, "w") as f:
secret = secrets.token_hex(32)
f.write(secret)
@@ -154,11 +133,6 @@ def get_socketio_path():
# Socket.IO will be available at {prefix}/socket.io/
return prefix
@app.template_global('is_safe_valid_url')
def _is_safe_valid_url(test_url):
from .validate_url import is_safe_valid_url
return is_safe_valid_url(test_url)
@app.template_filter('format_number_locale')
def _jinja2_filter_format_number_locale(value: float) -> str:
@@ -231,55 +205,6 @@ def _jinja2_filter_seconds_precise(timestamp):
return format(int(time.time()-timestamp), ',d')
@app.template_filter('fetcher_status_icons')
def _jinja2_filter_fetcher_status_icons(fetcher_name):
"""Get status icon HTML for a given fetcher.
This filter checks both built-in fetchers and plugin fetchers for status icons.
Args:
fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
Returns:
str: HTML string containing status icon elements
"""
from changedetectionio import content_fetchers
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
from markupsafe import Markup
from flask import url_for
icon_data = None
# First check if it's a plugin fetcher (plugins have priority)
plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
if plugin_icon_data:
icon_data = plugin_icon_data
# Check if it's a built-in fetcher
elif hasattr(content_fetchers, fetcher_name):
fetcher_class = getattr(content_fetchers, fetcher_name)
if hasattr(fetcher_class, 'get_status_icon_data'):
icon_data = fetcher_class.get_status_icon_data()
# Build HTML from icon data
if icon_data and isinstance(icon_data, dict):
# Use 'group' from icon_data if specified, otherwise default to 'images'
group = icon_data.get('group', 'images')
# Try to use url_for, but fall back to manual URL building if endpoint not registered yet
try:
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
except:
# Fallback: build URL manually respecting APPLICATION_ROOT
from flask import request
app_root = request.script_root if hasattr(request, 'script_root') else ''
icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
return Markup(html)
return ''
# Import login_optionally_required from auth_decorator
from changedetectionio.auth_decorator import login_optionally_required
@@ -377,9 +302,6 @@ def changedetection_app(config=None, datastore_o=None):
return login_manager.unauthorized()
watch_api.add_resource(WatchHistoryDiff,
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
resource_class_kwargs={'datastore': datastore})
watch_api.add_resource(WatchSingleHistory,
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
@@ -460,7 +382,7 @@ def changedetection_app(config=None, datastore_o=None):
# We would sometimes get login loop errors on sites hosted in sub-paths
# note for the future:
# if not is_safe_valid_url(next):
# if not is_safe_url(next):
# return flask.abort(400)
return redirect(url_for('watchlist.index'))
@@ -561,31 +483,6 @@ def changedetection_app(config=None, datastore_o=None):
except FileNotFoundError:
abort(404)
# Handle plugin group specially
if group == 'plugin':
# Serve files from plugin static directories
from changedetectionio.pluggy_interface import plugin_manager
import os as os_check
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
if hasattr(plugin_obj, 'plugin_static_path'):
try:
static_path = plugin_obj.plugin_static_path()
if static_path and os_check.path.isdir(static_path):
# Check if file exists in plugin's static directory
plugin_file_path = os_check.path.join(static_path, filename)
if os_check.path.isfile(plugin_file_path):
# Found the file in a plugin
response = make_response(send_from_directory(static_path, filename))
response.headers['Cache-Control'] = 'max-age=3600, public' # Cache for 1 hour
return response
except Exception as e:
logger.debug(f"Error checking plugin {plugin_name} for static file: {e}")
pass
# File not found in any plugin
abort(404)
# These files should be in our subdirectory
try:
return send_from_directory(f"static/{group}", path=filename)
@@ -892,19 +789,15 @@ def ticker_thread_check_time_launch_checks():
# @todo - Maybe make this a hook?
# Time schedule limit - Decide between watch or global settings
scheduler_source = None
if watch.get('time_between_check_use_default'):
time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {})
scheduler_source = 'system/global settings'
logger.trace(f"{uuid} Time scheduler - Using system/global settings")
else:
time_schedule_limit = watch.get('time_schedule_limit')
scheduler_source = 'watch'
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
if time_schedule_limit and time_schedule_limit.get('enabled'):
logger.trace(f"{uuid} Time scheduler - Using scheduler settings from {scheduler_source}")
try:
result = is_within_schedule(time_schedule_limit=time_schedule_limit,
default_tz=tz_name
@@ -916,7 +809,6 @@ def ticker_thread_check_time_launch_checks():
logger.error(
f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}")
return False
# If they supplied an individual entry minutes to threshold.
threshold = recheck_time_system_seconds if watch.get('time_between_check_use_default') else watch.threshold_seconds()

View File

@@ -3,7 +3,7 @@ import re
from loguru import logger
from wtforms.widgets.core import TimeInput
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
from changedetectionio.conditions.form import ConditionFormRow
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.strtobool import strtobool
@@ -28,8 +28,11 @@ from wtforms.utils import unset_value
from wtforms.validators import ValidationError
from validators.url import url as url_validator
from changedetectionio.widgets import TernaryNoneBooleanField
# default
# each select <option data-enabled="enabled-0-0"
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
@@ -503,9 +506,7 @@ class ValidateJinja2Template(object):
jinja2_env = create_jinja_env(loader=BaseLoader)
# Add notification tokens for validation
static_token_placeholders = NotificationContextData()
static_token_placeholders.set_random_for_validation()
jinja2_env.globals.update(static_token_placeholders)
jinja2_env.globals.update(NotificationContextData())
if hasattr(field, 'extra_notification_tokens'):
jinja2_env.globals.update(field.extra_notification_tokens)
@@ -540,10 +541,19 @@ class validateURL(object):
def validate_url(test_url):
from changedetectionio.validate_url import is_safe_valid_url
if not is_safe_valid_url(test_url):
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
try:
url_validator(test_url, simple_host=allow_simplehost)
except validators.ValidationError:
#@todo check for xss
message = f"'{test_url}' is not a valid URL."
# This should be wtforms.validators.
raise ValidationError('Watch protocol is not permitted or invalid URL format')
raise ValidationError(message)
from .model.Watch import is_safe_url
if not is_safe_url(test_url):
# This should be wtforms.validators.
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
class ValidateSinglePythonRegexString(object):
@@ -731,6 +741,7 @@ class quickWatchForm(Form):
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
# Common to a single watch and the global settings
class commonSettingsForm(Form):
from . import processors
@@ -743,21 +754,13 @@ class commonSettingsForm(Form):
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items()))
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
# Not true anymore but keep the validate_ hook for future use, we convert color tags
# def validate_notification_urls(self, field):
# """Validate that HTML Color format is not used with Telegram"""
# if self.notification_format.data == 'HTML Color' and field.data:
# for url in field.data:
# if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url):
# raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).')
class importForm(Form):
from . import processors
@@ -994,25 +997,13 @@ class globalSettingsApplicationForm(commonSettingsForm):
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
ignore_whitespace = BooleanField('Ignore whitespace')
ssim_threshold = SelectField(
'Default Screenshot Comparison Sensitivity',
choices=[
('0.75', 'Low sensitivity (only major changes)'),
('0.85', 'Medium sensitivity (moderate changes)'),
('0.96', 'High sensitivity (small changes)'),
('0.999', 'Very high sensitivity (any change)')
],
default='0.96'
)
password = SaltyPasswordField()
pager_size = IntegerField('Pager size',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,
message="Should be atleast zero (disabled)")])
rss_content_format = SelectField('RSS Content format', choices=list(RSS_FORMAT_TYPES.items()))
rss_template_type = SelectField('RSS <description> body built from', choices=list(RSS_TEMPLATE_TYPE_OPTIONS.items()))
rss_template_override = TextAreaField('RSS "System default" template override', render_kw={"rows": "5", "placeholder": RSS_TEMPLATE_HTML_DEFAULT}, validators=[validators.Optional(), ValidateJinja2Template()])
rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES)
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
@@ -1021,10 +1012,8 @@ class globalSettingsApplicationForm(commonSettingsForm):
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
validators=[validators.Optional()])
rss_reader_mode = BooleanField('Enable RSS reader mode ', default=False, validators=[validators.Optional()])
rss_diff_length = IntegerField(label='Number of changes to show in watch RSS feed',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, message="Should contain zero or more attempts")])
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
validators=[validators.Optional()])
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"},

View File

@@ -1,5 +1,3 @@
from functools import lru_cache
from loguru import logger
from typing import List
import html
@@ -15,6 +13,7 @@ TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
# 'price' , 'lowPrice', 'highPrice' are usually under here
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
@@ -23,9 +22,9 @@ class JSONNotFound(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
# Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "(?i)foobar" type configuration
@lru_cache(maxsize=100)
def perl_style_slash_enclosed_regex_to_options(regex):
res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE)
@@ -172,131 +171,75 @@ def elementpath_tostring(obj):
return str(obj)
# Return str Utf-8 of matched rules
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
"""
:param xpath_filter:
:param html_content:
:param append_pretty_line_formatting:
:param is_xml: set to true if is XML or is RSS (RSS is XML)
:return:
"""
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
from lxml import etree, html
import elementpath
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
parser = etree.HTMLParser()
tree = None
try:
if is_xml:
# So that we can keep CDATA for cdata_in_document_to_text() to process
parser = etree.XMLParser(strip_cdata=False)
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
if is_rss:
# So that we can keep CDATA for cdata_in_document_to_text() to process
parser = etree.XMLParser(strip_cdata=False)
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
html_block = ""
r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
#@note: //title/text() wont work where <title>CDATA..
if type(r) != list:
r = [r]
for element in r:
# When there's more than 1 match, then add the suffix to separate each line
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
# (This way each 'match' reliably has a new-line in the diff)
# Divs are converted to 4 whitespaces by inscriptis
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
if type(element) == str:
html_block += element
elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
else:
tree = html.fromstring(html_content, parser=parser)
html_block = ""
html_block += elementpath_tostring(element)
# Build namespace map for XPath queries
namespaces = {'re': 'http://exslt.org/regular-expressions'}
# Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
# XPath spec: unprefixed element names have no namespace, not the default namespace
# Solution: Register the default namespace with empty string prefix in elementpath
# This is primarily for RSS/Atom feeds but works for any XML with default namespace
if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
# Register the default namespace with empty string prefix for elementpath
# This allows //title to match elements in the default namespace
namespaces[''] = tree.nsmap[None]
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
if type(r) != list:
r = [r]
for element in r:
# When there's more than 1 match, then add the suffix to separate each line
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
# (This way each 'match' reliably has a new-line in the diff)
# Divs are converted to 4 whitespaces by inscriptis
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
if type(element) == str:
html_block += element
elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
# Use 'xml' method for RSS/XML content, 'html' for HTML content
# parser will be XMLParser if we detected XML content
method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
else:
html_block += elementpath_tostring(element)
return html_block
finally:
# Explicitly clear the tree to free memory
# lxml trees can hold significant memory, especially with large documents
if tree is not None:
tree.clear()
return html_block
# Return str Utf-8 of matched rules
# 'xpath1:'
def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
from lxml import etree, html
parser = None
tree = None
try:
if is_xml:
# So that we can keep CDATA for cdata_in_document_to_text() to process
parser = etree.XMLParser(strip_cdata=False)
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
if is_rss:
# So that we can keep CDATA for cdata_in_document_to_text() to process
parser = etree.XMLParser(strip_cdata=False)
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
html_block = ""
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
#@note: //title/text() wont work where <title>CDATA..
for element in r:
# When there's more than 1 match, then add the suffix to separate each line
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
# (This way each 'match' reliably has a new-line in the diff)
# Divs are converted to 4 whitespaces by inscriptis
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
# Some kind of text, UTF-8 or other
if isinstance(element, (str, bytes)):
html_block += element
else:
tree = html.fromstring(html_content, parser=parser)
html_block = ""
# Return the HTML which will get parsed as text
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
# Build namespace map for XPath queries
namespaces = {'re': 'http://exslt.org/regular-expressions'}
# NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
# For documents with default namespace (RSS/Atom feeds), users must use:
# - local-name(): //*[local-name()='title']/text()
# - Or use xpath_filter (not xpath1_filter) which supports default namespaces
# XPath spec: unprefixed element names have no namespace, not the default namespace
r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
#@note: xpath1 (lxml) does NOT automatically handle default namespaces
#@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
for element in r:
# When there's more than 1 match, then add the suffix to separate each line
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
# (This way each 'match' reliably has a new-line in the diff)
# Divs are converted to 4 whitespaces by inscriptis
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
# Some kind of text, UTF-8 or other
if isinstance(element, (str, bytes)):
html_block += element
else:
# Return the HTML/XML which will get parsed as text
# Use 'xml' method for RSS/XML content, 'html' for HTML content
# parser will be XMLParser if we detected XML content
method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
return html_block
finally:
# Explicitly clear the tree to free memory
# lxml trees can hold significant memory, especially with large documents
if tree is not None:
tree.clear()
return html_block
# Extract/find element
def extract_element(find='title', html_content=''):
@@ -464,13 +407,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
ignore_regex_multiline = []
ignored_lines = []
if not content:
return ''
for k in wordlist:
# Skip empty strings to avoid matching everything
if not k or not k.strip():
continue
# Is it a regex?
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
if res:

View File

@@ -9,7 +9,6 @@ from .safe_jinja import (
JINJA2_MAX_RETURN_PAYLOAD_SIZE,
DEFAULT_JINJA2_EXTENSIONS,
)
from .plugins.regex import regex_replace
__all__ = [
'TimeExtension',
@@ -18,5 +17,4 @@ __all__ = [
'create_jinja_env',
'JINJA2_MAX_RETURN_PAYLOAD_SIZE',
'DEFAULT_JINJA2_EXTENSIONS',
'regex_replace',
]

View File

@@ -1,6 +0,0 @@
"""
Jinja2 custom filter plugins for changedetection.io
"""
from .regex import regex_replace
__all__ = ['regex_replace']

View File

@@ -1,98 +0,0 @@
"""
Regex filter plugin for Jinja2 templates.
Provides regex_replace filter for pattern-based string replacements in templates.
"""
import re
import signal
from loguru import logger
def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str:
"""
Replace occurrences of a regex pattern in a string.
Security: Protected against ReDoS (Regular Expression Denial of Service) attacks:
- Limits input value size to prevent excessive processing
- Uses timeout mechanism to prevent runaway regex operations
- Validates pattern complexity to prevent catastrophic backtracking
Args:
value: The input string to perform replacements on
pattern: The regex pattern to search for
replacement: The replacement string (default: '')
count: Maximum number of replacements (0 = replace all, default: 0)
Returns:
String with replacements applied, or original value on error
Example:
{{ "hello world" | regex_replace("world", "universe") }}
{{ diff | regex_replace("<td>([^<]+)</td><td>([^<]+)</td>", "Label1: \\1\\nLabel2: \\2") }}
Security limits:
- Maximum input size: 10MB
- Maximum pattern length: 500 characters
- Operation timeout: 10 seconds
- Dangerous nested quantifier patterns are rejected
"""
# Security limits
MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size
MAX_PATTERN_LENGTH = 500 # Maximum regex pattern length
REGEX_TIMEOUT_SECONDS = 10 # Maximum time for regex operation
# Validate input sizes
value_str = str(value)
if len(value_str) > MAX_INPUT_SIZE:
logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating")
value_str = value_str[:MAX_INPUT_SIZE]
if len(pattern) > MAX_PATTERN_LENGTH:
logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting")
return value_str
# Check for potentially dangerous patterns (basic checks)
# Nested quantifiers like (a+)+ can cause catastrophic backtracking
dangerous_patterns = [
r'\([^)]*\+[^)]*\)\+', # (x+)+
r'\([^)]*\*[^)]*\)\+', # (x*)+
r'\([^)]*\+[^)]*\)\*', # (x+)*
r'\([^)]*\*[^)]*\)\*', # (x*)*
]
for dangerous in dangerous_patterns:
if re.search(dangerous, pattern):
logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}")
return value_str
def timeout_handler(signum, frame):
raise TimeoutError("Regex operation timed out")
try:
# Set up timeout for regex operation (Unix-like systems only)
# This prevents ReDoS attacks
old_handler = None
if hasattr(signal, 'SIGALRM'):
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(REGEX_TIMEOUT_SECONDS)
try:
result = re.sub(pattern, replacement, value_str, count=count)
finally:
# Cancel the alarm
if hasattr(signal, 'SIGALRM'):
signal.alarm(0)
if old_handler is not None:
signal.signal(signal.SIGALRM, old_handler)
return result
except TimeoutError:
logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}")
return value_str
except re.error as e:
logger.warning(f"regex_replace: Invalid regex pattern: {e}")
return value_str
except Exception as e:
logger.error(f"regex_replace: Unexpected error: {e}")
return value_str

View File

@@ -8,13 +8,13 @@ import jinja2.sandbox
import typing as t
import os
from .extensions.TimeExtension import TimeExtension
from .plugins import regex_replace
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
# Default extensions - can be overridden in create_jinja_env()
DEFAULT_JINJA2_EXTENSIONS = [TimeExtension]
def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment:
"""
Create a sandboxed Jinja2 environment with our custom extensions and default timezone.
@@ -38,9 +38,6 @@ def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandb
default_timezone = os.getenv('TZ', 'UTC').strip()
jinja2_env.default_timezone = default_timezone
# Register custom filters
jinja2_env.filters['regex_replace'] = regex_replace
return jinja2_env

View File

@@ -1,7 +1,6 @@
from os import getenv
from copy import deepcopy
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
from changedetectionio.notification import (
default_notification_body,
@@ -46,7 +45,6 @@ class model(dict):
'global_subtractive_selectors': [],
'ignore_whitespace': True,
'ignore_status_codes': False, #@todo implement, as ternary.
'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'notification_title': default_notification_title,
@@ -55,10 +53,7 @@ class model(dict):
'password': False,
'render_anchor_tag_content': False,
'rss_access_token': None,
'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT,
'rss_template_type': 'system_default',
'rss_template_override': None,
'rss_diff_length': 5,
'rss_content_format': RSS_FORMAT_TYPES[0][0],
'rss_hide_muted_watches': True,
'rss_reader_mode': False,
'scheduler_timezone_default': None, # Default IANA timezone name
@@ -79,13 +74,12 @@ class model(dict):
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
self.update(deepcopy(self.base_config))
self.update(self.base_config)
def parse_headers_from_text_file(filepath):
headers = {}
with open(filepath, 'r', encoding='utf-8') as f:
with open(filepath, 'r') as f:
for l in f.readlines():
l = l.strip()
if not l.startswith('#') and ':' in l:

View File

@@ -1,5 +1,4 @@
from blinker import signal
from changedetectionio.validate_url import is_safe_valid_url
from changedetectionio.strtobool import strtobool
from changedetectionio.jinja2_custom import render as jinja_render
@@ -10,15 +9,34 @@ from pathlib import Path
from loguru import logger
from .. import jinja2_custom as safe_jinja
from ..diff import ADDED_PLACEMARKER_OPEN
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
# Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
def is_safe_url(test_url):
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
# 'source:' is a valid way to tell us to return the source
r = re.compile(re.escape('source:'), re.IGNORECASE)
test_url = r.sub('', test_url)
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
if not pattern.match(test_url.strip()):
return False
return True
class model(watch_base):
__newest_history_key = None
__history_n = 0
@@ -61,7 +79,7 @@ class model(watch_base):
def link(self):
url = self.get('url', '')
if not is_safe_valid_url(url):
if not is_safe_url(url):
return 'DISABLED'
ready_url = url
@@ -71,8 +89,9 @@ class model(watch_base):
ready_url = jinja_render(template_str=url)
except Exception as e:
logger.critical(f"Invalid URL template for: '{url}' - {str(e)}")
from flask import flash, url_for
from markupsafe import Markup
from flask import (
flash, Markup, url_for
)
message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format(
url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', '')))
flash(message, 'error')
@@ -82,7 +101,7 @@ class model(watch_base):
ready_url=ready_url.replace('source:', '')
# Also double check it after any Jinja2 formatting just incase
if not is_safe_valid_url(ready_url):
if not is_safe_url(ready_url):
return 'DISABLED'
return ready_url
@@ -188,7 +207,7 @@ class model(watch_base):
fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname):
logger.debug(f"Reading watch history index for {self.get('uuid')}")
with open(fname, "r", encoding='utf-8') as f:
with open(fname, "r") as f:
for i in f.readlines():
if ',' in i:
k, v = i.strip().split(',', 2)
@@ -276,17 +295,9 @@ class model(watch_base):
# When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
return sorted_keys[-1]
def get_history_snapshot(self, timestamp=None, filepath=None):
"""
Accepts either timestamp or filepath
:param timestamp:
:param filepath:
:return:
"""
def get_history_snapshot(self, timestamp):
import brotli
if not filepath:
filepath = self.history[timestamp]
filepath = self.history[timestamp]
# See if a brotli versions exists and switch to that
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
@@ -303,14 +314,6 @@ class model(watch_base):
with open(filepath, 'rb') as f:
return(brotli.decompress(f.read()).decode('utf-8'))
# Check if binary file (image, PDF, etc.) vs text
binary_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.pdf', '.bin')
if any(filepath.endswith(ext) for ext in binary_extensions):
# Binary file - return raw bytes
with open(filepath, 'rb') as f:
return f.read()
# Text file - decode to string
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
@@ -326,34 +329,13 @@ class model(watch_base):
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
# Auto-detect binary vs text data
if isinstance(contents, bytes):
# Binary data (screenshots, images, PDFs, etc.)
# Detect image format from magic bytes for proper extension
if contents[:8] == b'\x89PNG\r\n\x1a\n':
ext = 'png'
elif contents[:3] == b'\xff\xd8\xff':
ext = 'jpg'
elif contents[:6] in (b'GIF87a', b'GIF89a'):
ext = 'gif'
elif contents[:4] == b'RIFF' and contents[8:12] == b'WEBP':
ext = 'webp'
elif contents[:4] == b'%PDF':
ext = 'pdf'
else:
ext = 'bin' # Generic binary
snapshot_fname = f"{snapshot_id}.{ext}"
encoded_data = contents # Already bytes, no encoding needed
logger.trace(f"Saving binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
# Decide on snapshot filename and destination path
if not skip_brotli and len(contents) > threshold:
snapshot_fname = f"{snapshot_id}.txt.br"
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
else:
# Text data (HTML, JSON, etc.) - apply brotli compression
if not skip_brotli and len(contents) > threshold:
snapshot_fname = f"{snapshot_id}.txt.br"
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
else:
snapshot_fname = f"{snapshot_id}.txt"
encoded_data = contents.encode('utf-8')
snapshot_fname = f"{snapshot_id}.txt"
encoded_data = contents.encode('utf-8')
dest = os.path.join(self.watch_data_dir, snapshot_fname)
@@ -419,7 +401,7 @@ class model(watch_base):
# Compare each lines (set) against each history text file (set) looking for something new..
existing_history = set({})
for k, v in self.history.items():
content = self.get_history_snapshot(filepath=v)
content = self.get_history_snapshot(k)
if ignore_whitespace:
alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
@@ -623,7 +605,7 @@ class model(watch_base):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error.txt")
if os.path.isfile(fname):
with open(fname, 'r', encoding='utf-8') as f:
with open(fname, 'r') as f:
return f.read()
return False
@@ -676,7 +658,7 @@ class model(watch_base):
for k, fname in self.history.items():
if os.path.isfile(fname):
if True:
contents = self.get_history_snapshot(timestamp=k)
contents = self.get_history_snapshot(k)
res = re.findall(regex, contents, re.MULTILINE)
if res:
if not csv_writer:
@@ -769,7 +751,7 @@ class model(watch_base):
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
dates = list(self.history.keys())
if len(dates):
return self.get_history_snapshot(timestamp=dates[-1])
return self.get_history_snapshot(dates[-1])
else:
return ''
@@ -855,7 +837,6 @@ class model(watch_base):
# has app+request context, we can use url_for()
if has_app_context:
if last_error:
last_error = safe_jinja.render_fully_escaped(last_error)
if '403' in last_error:
if has_proxies:
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a>&nbsp;'")))
@@ -865,9 +846,7 @@ class model(watch_base):
output.append(str(Markup(last_error)))
if self.get('last_notification_error'):
txt = safe_jinja.render_fully_escaped(self.get('last_notification_error'))
result = f'<div class="notification-error"><a href="{url_for("settings.notification_logs")}">{txt}</a></div>'
output.append(result)
output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
else:
# Lo_Fi version - no app context, cant rely on Jinja2 Markup

View File

@@ -2,7 +2,7 @@ import os
import uuid
from changedetectionio import strtobool
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
default_notification_format_for_watch = 'System default'
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
class watch_base(dict):
@@ -44,7 +44,7 @@ class watch_base(dict):
'method': 'GET',
'notification_alert_count': 0,
'notification_body': None,
'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
'notification_format': default_notification_format_for_watch,
'notification_muted': False,
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
'notification_title': None,
@@ -55,6 +55,7 @@ class watch_base(dict):
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'processor': 'text_json_diff', # could be restock_diff or others from .processors
'price_change_threshold_percent': None,
'price_change_custom_include_filters': None, # Like 'include_filter' but for price changes only
'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header
'sort_text_alphabetically': False,

View File

@@ -1,16 +1,18 @@
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from changedetectionio.model import default_notification_format_for_watch
default_notification_format = 'htmlcolor'
ult_notification_format_for_watch = 'System default'
default_notification_format = 'HTML Color'
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
# The values (markdown etc) are from apprise NotifyFormat,
# But to avoid importing the whole heavy module just use the same strings here.
valid_notification_formats = {
'text': 'Plain Text',
'html': 'HTML',
'htmlcolor': 'HTML Color',
'markdown': 'Markdown to HTML',
'Text': 'text',
'Markdown': 'markdown',
'HTML': 'html',
'HTML Color': 'htmlcolor',
# Used only for editing a watch (not for global)
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
default_notification_format_for_watch: default_notification_format_for_watch
}

View File

@@ -1,61 +1,10 @@
"""
Custom Apprise HTTP Handlers with format= Parameter Support
IMPORTANT: This module works around a limitation in Apprise's @notify decorator.
THE PROBLEM:
-------------
When using Apprise's @notify decorator to create custom notification handlers, the
decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse
URLs. This simple parsing mode does NOT extract the format= query parameter from the URL
and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format.
As a result:
1. URL: post://example.com/webhook?format=html
2. Apprise parses this and sees format=html in qsd (query string dictionary)
3. But it does NOT extract it and pass it to NotifyBase.__init__
4. NotifyBase defaults to notify_format=TEXT
5. When you call apobj.notify(body="<html>...", body_format="html"):
- Apprise sees: input format = html, output format (notify_format) = text
- Apprise calls convert_between("html", "text", body)
- This strips all HTML tags, leaving only plain text
6. Your custom handler receives stripped plain text instead of HTML
THE SOLUTION:
-------------
Instead of using the @notify decorator directly, we:
1. Manually register custom plugins using plugins.N_MGR.add()
2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin
3. Override __init__ to extract format= from qsd and set it as kwargs['format']
4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format']
5. Set up _default_args like CustomNotifyPlugin does for compatibility
This ensures that when format=html is in the URL:
- notify_format is set to HTML
- Apprise sees: input format = html, output format = html
- No conversion happens (convert_between returns content unchanged)
- Your custom handler receives the original HTML intact
TESTING:
--------
To verify this works:
>>> apobj = apprise.Apprise()
>>> apobj.add('post://localhost:5005/test?format=html')
>>> for server in apobj:
... print(server.notify_format) # Should print: html (not text)
>>> apobj.notify(body='<span>Test</span>', body_format='html')
# Your handler should receive '<span>Test</span>' not 'Test'
"""
import json
import re
from urllib.parse import unquote_plus
import requests
from apprise import plugins
from apprise.decorators.base import CustomNotifyPlugin
from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly
from apprise.utils.logic import dict_full_update
from apprise.decorators import notify
from apprise.utils.parse import parse_url as apprise_parse_url
from loguru import logger
from requests.structures import CaseInsensitiveDict
@@ -63,66 +12,13 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
def notify_supported_methods(func):
"""Register custom HTTP method handlers that properly support format= parameter."""
for method in SUPPORTED_HTTP_METHODS:
_register_http_handler(method, func)
_register_http_handler(f"{method}s", func)
func = notify(on=method)(func)
# Add support for https, for each supported http method
func = notify(on=f"{method}s")(func)
return func
def _register_http_handler(schema, send_func):
"""Register a custom HTTP handler that extracts format= from URL query parameters."""
# Parse base URL
base_url = f"{schema}://"
base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True)
class CustomHTTPHandler(CustomNotifyPlugin):
secure_protocol = schema
service_name = f"Custom HTTP - {schema.upper()}"
_base_args = base_args
def __init__(self, **kwargs):
# Extract format from qsd and set it as a top-level kwarg
# This allows NotifyBase.__init__ to properly set notify_format
if 'qsd' in kwargs and 'format' in kwargs['qsd']:
kwargs['format'] = kwargs['qsd']['format']
# Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__)
super(CustomNotifyPlugin, self).__init__(**kwargs)
# Set up _default_args like CustomNotifyPlugin does
self._default_args = {}
kwargs.pop("secure", None)
dict_full_update(self._default_args, self._base_args)
dict_full_update(self._default_args, kwargs)
self._default_args["url"] = url_assembly(**self._default_args)
__send = staticmethod(send_func)
def send(self, body, title="", notify_type="info", *args, **kwargs):
"""Call the custom send function."""
try:
result = self.__send(
body, title, notify_type,
*args,
meta=self._default_args,
**kwargs
)
return True if result is None else bool(result)
except Exception as e:
self.logger.warning(f"Exception in custom HTTP handler: {e}")
return False
# Register the plugin
plugins.N_MGR.add(
plugin=CustomHTTPHandler,
schemas=schema,
send_func=send_func,
url=base_url,
)
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
user: str | None = parsed_url.get("user")
password: str | None = parsed_url.get("password")
@@ -174,12 +70,9 @@ def apprise_http_custom_handler(
title: str,
notify_type: str,
meta: dict,
body_format: str = None,
*args,
**kwargs,
) -> bool:
url: str = meta.get("url")
schema: str = meta.get("schema")
method: str = re.sub(r"s$", "", schema).upper()
@@ -195,16 +88,25 @@ def apprise_http_custom_handler(
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
response = requests.request(
method=method,
url=url,
auth=auth,
headers=headers,
params=params,
data=body.encode("utf-8") if isinstance(body, str) else body,
)
try:
response = requests.request(
method=method,
url=url,
auth=auth,
headers=headers,
params=params,
data=body.encode("utf-8") if isinstance(body, str) else body,
)
response.raise_for_status()
response.raise_for_status()
logger.info(f"Successfully sent custom notification to {url}")
return True
logger.info(f"Successfully sent custom notification to {url}")
return True
except requests.RequestException as e:
logger.error(f"Remote host error while sending custom notification to {url}: {e}")
return False
except Exception as e:
logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
return False

View File

@@ -1,286 +0,0 @@
"""
Custom Discord plugin for changedetection.io
Extends Apprise's Discord plugin to support custom colored embeds for removed/added content
"""
from apprise.plugins.discord import NotifyDiscord
from apprise.decorators import notify
from apprise.common import NotifyFormat
from loguru import logger
# Import placeholders from changedetection's diff module
from ...diff import (
REMOVED_PLACEMARKER_OPEN,
REMOVED_PLACEMARKER_CLOSED,
ADDED_PLACEMARKER_OPEN,
ADDED_PLACEMARKER_CLOSED,
CHANGED_PLACEMARKER_OPEN,
CHANGED_PLACEMARKER_CLOSED,
CHANGED_INTO_PLACEMARKER_OPEN,
CHANGED_INTO_PLACEMARKER_CLOSED,
)
# Discord embed sidebar colors for different change types
DISCORD_COLOR_UNCHANGED = 8421504 # Gray (#808080)
DISCORD_COLOR_REMOVED = 16711680 # Red (#FF0000)
DISCORD_COLOR_ADDED = 65280 # Green (#00FF00)
DISCORD_COLOR_CHANGED = 16753920 # Orange (#FFA500)
DISCORD_COLOR_CHANGED_INTO = 3447003 # Blue (#5865F2 - Discord blue)
DISCORD_COLOR_WARNING = 16776960 # Yellow (#FFFF00)
class NotifyDiscordCustom(NotifyDiscord):
"""
Custom Discord notification handler that supports multiple colored embeds
for showing removed (red) and added (green) content separately.
"""
def send(self, body, title="", notify_type=None, attach=None, **kwargs):
"""
Override send method to create custom embeds with red/green colors
for removed/added content when placeholders are present.
"""
# Check if body contains our diff placeholders
has_removed = REMOVED_PLACEMARKER_OPEN in body
has_added = ADDED_PLACEMARKER_OPEN in body
has_changed = CHANGED_PLACEMARKER_OPEN in body
has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body
# If we have diff placeholders and we're in markdown/html format, create custom embeds
if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML):
return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs)
# Otherwise, use the parent class's default behavior
return super().send(body, title, notify_type, attach, **kwargs)
def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs):
"""
Send Discord message with embeds in the original diff order.
Preserves the sequence: unchanged -> removed -> added -> unchanged, etc.
"""
from datetime import datetime, timezone
payload = {
"tts": self.tts,
"wait": self.tts is False,
}
if self.flags:
payload["flags"] = self.flags
# Acquire image_url
image_url = self.image_url(notify_type)
if self.avatar and (image_url or self.avatar_url):
payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url
if self.user:
payload["username"] = self.user
# Associate our thread_id with our message
params = {"thread_id": self.thread_id} if self.thread_id else None
# Build embeds array preserving order
embeds = []
# Add title as plain bold text in message content (not an embed)
if title:
payload["content"] = f"**{title}**"
# Parse the body into ordered chunks
chunks = self._parse_body_into_chunks(body)
# Discord limits:
# - Max 10 embeds per message
# - Max 6000 characters total across all embeds
# - Max 4096 characters per embed description
max_embeds = 10
max_total_chars = 6000
max_embed_description = 4096
# All 10 embed slots are available for content
max_content_embeds = max_embeds
# Start character count
total_chars = 0
# Create embeds from chunks in order (no titles, just color coding)
for chunk_type, content in chunks:
if not content.strip():
continue
# Truncate individual embed description if needed
if len(content) > max_embed_description:
content = content[:max_embed_description - 3] + "..."
# Check if we're approaching the embed count limit
# We need room for the warning embed, so stop at max_content_embeds - 1
current_content_embeds = len(embeds)
if current_content_embeds >= max_content_embeds - 1:
# Add a truncation notice (this will be the 10th embed)
embeds.append({
"description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs",
"color": DISCORD_COLOR_WARNING,
})
break
# Check if adding this embed would exceed total character limit
if total_chars + len(content) > max_total_chars:
# Add a truncation notice
remaining_chars = max_total_chars - total_chars
if remaining_chars > 100:
# Add partial content if we have room
truncated_content = content[:remaining_chars - 100] + "..."
embeds.append({
"description": truncated_content,
"color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged"
else DISCORD_COLOR_REMOVED if chunk_type == "removed"
else DISCORD_COLOR_ADDED),
})
embeds.append({
"description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs",
"color": DISCORD_COLOR_WARNING,
})
break
if chunk_type == "unchanged":
embeds.append({
"description": content,
"color": DISCORD_COLOR_UNCHANGED,
})
elif chunk_type == "removed":
embeds.append({
"description": content,
"color": DISCORD_COLOR_REMOVED,
})
elif chunk_type == "added":
embeds.append({
"description": content,
"color": DISCORD_COLOR_ADDED,
})
elif chunk_type == "changed":
# Changed (old value) - use orange to distinguish from pure removal
embeds.append({
"description": content,
"color": DISCORD_COLOR_CHANGED,
})
elif chunk_type == "changed_into":
# Changed into (new value) - use blue to distinguish from pure addition
embeds.append({
"description": content,
"color": DISCORD_COLOR_CHANGED_INTO,
})
total_chars += len(content)
if embeds:
payload["embeds"] = embeds
# Send the payload using parent's _send method
if not self._send(payload, params=params):
return False
# Handle attachments if present
if attach and self.attachment_support:
payload.update({
"tts": False,
"wait": True,
})
payload.pop("embeds", None)
payload.pop("content", None)
payload.pop("allow_mentions", None)
for attachment in attach:
self.logger.info(f"Posting Discord Attachment {attachment.name}")
if not self._send(payload, params=params, attach=attachment):
return False
return True
def _parse_body_into_chunks(self, body):
"""
Parse the body into ordered chunks of (type, content) tuples.
Types: "unchanged", "removed", "added", "changed", "changed_into"
Preserves the original order of the diff.
"""
chunks = []
position = 0
while position < len(body):
# Find the next marker
next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position)
next_added = body.find(ADDED_PLACEMARKER_OPEN, position)
next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position)
next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position)
# Determine which marker comes first
if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1:
# No more markers, rest is unchanged
if position < len(body):
chunks.append(("unchanged", body[position:]))
break
# Find the earliest marker
next_marker_pos = None
next_marker_type = None
# Compare all marker positions to find the earliest
markers = []
if next_removed != -1:
markers.append((next_removed, "removed"))
if next_added != -1:
markers.append((next_added, "added"))
if next_changed != -1:
markers.append((next_changed, "changed"))
if next_changed_into != -1:
markers.append((next_changed_into, "changed_into"))
if markers:
next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0])
# Add unchanged content before the marker
if next_marker_pos > position:
chunks.append(("unchanged", body[position:next_marker_pos]))
# Find the closing marker
if next_marker_type == "removed":
open_marker = REMOVED_PLACEMARKER_OPEN
close_marker = REMOVED_PLACEMARKER_CLOSED
elif next_marker_type == "added":
open_marker = ADDED_PLACEMARKER_OPEN
close_marker = ADDED_PLACEMARKER_CLOSED
elif next_marker_type == "changed":
open_marker = CHANGED_PLACEMARKER_OPEN
close_marker = CHANGED_PLACEMARKER_CLOSED
else: # changed_into
open_marker = CHANGED_INTO_PLACEMARKER_OPEN
close_marker = CHANGED_INTO_PLACEMARKER_CLOSED
close_pos = body.find(close_marker, next_marker_pos)
if close_pos == -1:
# No closing marker, take rest as this type
content = body[next_marker_pos + len(open_marker):]
chunks.append((next_marker_type, content))
break
else:
# Extract content between markers
content = body[next_marker_pos + len(open_marker):close_pos]
chunks.append((next_marker_type, content))
position = close_pos + len(close_marker)
return chunks
# Register the custom Discord handler with Apprise
# This will override the built-in discord:// handler
@notify(on="discord")
def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs):
"""
Wrapper function to make the custom Discord handler work with Apprise's decorator system.
Note: This decorator approach may not work for overriding built-in plugins.
The class-based approach above is the proper way to extend NotifyDiscord.
"""
logger.info("Custom Discord handler called")
# This is here for potential future use with decorator-based registration
return True

View File

@@ -1,42 +0,0 @@
def as_monospaced_html_email(content: str, title: str) -> str:
"""
Wraps `content` in a minimal, email-safe HTML template
that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc.
Args:
content: The body text (plain text or HTML-like).
title: The title plaintext
Returns:
A complete HTML document string suitable for sending as an email body.
"""
# All line feed types should be removed and then this function should only be fed <br>'s
# Then it works with our <pre> styling without double linefeeds
content = content.translate(str.maketrans('', '', '\r\n'))
if title:
import html
title = html.escape(title)
else:
title = ''
# 2. Full email-safe HTML
html_email = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="x-apple-disable-message-reformatting">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--[if mso]>
<style>
body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }}
</style>
<![endif]-->
<title>{title}</title>
</head>
<body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;">
<pre role="article" aria-roledescription="email" lang="en"
style="font-family: monospace, 'Courier New', Courier; font-size: 0.9rem;
white-space: pre-wrap; word-break: break-word;">{content}</pre>
</body>
</html>"""
return html_email

View File

@@ -1,310 +1,16 @@
import time
import re
import apprise
from apprise import NotifyFormat
from loguru import logger
from urllib.parse import urlparse
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
from .email_helpers import as_monospaced_html_email
from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \
ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \
CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE
import re
from ..notification_service import NotificationContextData, add_rendered_diff_to_notification_vars
newline_re = re.compile(r'\r\n|\r|\n')
def markup_text_links_to_html(body):
"""
Convert plaintext to HTML with clickable links.
Uses Jinja2's escape and Markup for XSS safety.
"""
from linkify_it import LinkifyIt
from markupsafe import Markup, escape
linkify = LinkifyIt()
# Match URLs in the ORIGINAL text (before escaping)
matches = linkify.match(body)
if not matches:
# No URLs, just escape everything
return Markup(escape(body))
result = []
last_index = 0
# Process each URL match
for match in matches:
# Add escaped text before the URL
if match.index > last_index:
text_part = body[last_index:match.index]
result.append(escape(text_part))
# Add the link with escaped URL (both in href and display)
url = match.url
result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>'))
last_index = match.last_index
# Add remaining escaped text
if last_index < len(body):
result.append(escape(body[last_index:]))
# Join all parts
return str(Markup(''.join(str(part) for part in result)))
def notification_format_align_with_apprise(n_format : str):
"""
Correctly align changedetection's formats with apprise's formats
Probably these are the same - but good to be sure.
These set the expected OUTPUT format type
:param n_format:
:return:
"""
if n_format.startswith('html'):
# Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here
n_format = NotifyFormat.HTML.value
elif n_format.startswith('markdown'):
# probably the same but just to be safe
n_format = NotifyFormat.MARKDOWN.value
elif n_format.startswith('text'):
# probably the same but just to be safe
n_format = NotifyFormat.TEXT.value
else:
n_format = NotifyFormat.TEXT.value
return n_format
def apply_html_color_to_body(n_body: str):
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN,
f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN,
f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
# Handle changed/replaced lines (old → new)
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN,
f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN,
f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
return n_body
def apply_discord_markdown_to_body(n_body):
"""
Discord does not support <del> but it supports non-standard ~~strikethrough~~
:param n_body:
:return:
"""
import re
# Define the mapping between your placeholders and markdown markers
replacements = [
(REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'),
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
(CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'),
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
]
# So that the markdown gets added without any whitespace following it which would break it
for open_tag, open_md, close_tag, close_md in replacements:
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
pattern = re.compile(
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
flags=re.DOTALL
)
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
return n_body
def apply_standard_markdown_to_body(n_body):
"""
Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough.
:param n_body:
:return:
"""
import re
# Define the mapping between your placeholders and markdown markers
replacements = [
(REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'),
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
(CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'),
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
]
# So that the markdown gets added without any whitespace following it which would break it
for open_tag, open_md, close_tag, close_md in replacements:
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
pattern = re.compile(
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
flags=re.DOTALL
)
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
return n_body
def replace_placemarkers_in_text(text, url, requested_output_format):
"""
Replace diff placemarkers in text based on the URL service type and requested output format.
Used for both notification title and body to ensure consistent placeholder replacement.
:param text: The text to process
:param url: The notification URL (to detect service type)
:param requested_output_format: The output format (html, htmlcolor, markdown, text, etc.)
:return: Processed text with placemarkers replaced
"""
if not text:
return text
if url.startswith('tgram://'):
# Telegram only supports a limited subset of HTML
# Use strikethrough for removed content, bold for added content
text = text.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
text = text.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
text = text.replace(ADDED_PLACEMARKER_OPEN, '<b>')
text = text.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
# Handle changed/replaced lines (old → new)
text = text.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
text = text.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
or url.startswith('https://discord.com/api')) and requested_output_format == 'html':
# Discord doesn't support HTML, use Discord markdown
text = apply_discord_markdown_to_body(n_body=text)
elif requested_output_format == 'htmlcolor':
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
text = text.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
text = text.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
text = text.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
text = text.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
# Handle changed/replaced lines (old → new)
text = text.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
elif requested_output_format == 'markdown':
# Markdown to HTML - Apprise will convert this to HTML
text = apply_standard_markdown_to_body(n_body=text)
else:
# plaintext, html, and default - use simple text markers
text = text.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
text = text.replace(REMOVED_PLACEMARKER_CLOSED, '')
text = text.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
text = text.replace(ADDED_PLACEMARKER_CLOSED, '')
text = text.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'')
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
return text
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
logger.debug(f"Applying markup in '{requested_output_format}' mode")
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :(
# 2000 bytes minus -
# 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
# Length of URL - Incase they specify a longer custom avatar_url
if not n_body or not n_body.strip():
return url, n_body, n_title
# Normalize URL scheme to lowercase to prevent case-sensitivity issues
# e.g., "Discord://webhook" -> "discord://webhook", "TGRAM://bot123" -> "tgram://bot123"
scheme_separator_pos = url.find('://')
if scheme_separator_pos > 0:
url = url[:scheme_separator_pos].lower() + url[scheme_separator_pos:]
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
parsed = urlparse(url)
k = '?' if not parsed.query else '&'
if url and not 'avatar_url' in url \
and not url.startswith('mail') \
and not url.startswith('post') \
and not url.startswith('get') \
and not url.startswith('delete') \
and not url.startswith('put'):
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
# Replace placemarkers in title first (this was the missing piece causing the bug)
# Titles are ALWAYS plain text across all notification services (Discord embeds, Slack attachments,
# email Subject headers, etc.), so we always use 'text' format for title placemarker replacement
# Looking over apprise library it seems that all plugins only expect plain-text.
n_title = replace_placemarkers_in_text(n_title, url, 'text')
if url.startswith('tgram://'):
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
# re https://github.com/dgtlmoon/changedetection.io/issues/555
# @todo re-use an existing library we have already imported to strip all non-allowed tags
n_body = n_body.replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n')
n_body = newline_re.sub('\n', n_body)
# Replace placemarkers for body
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
# real limit is 4096, but minus some for extra metadata
payload_max_size = 3600
body_limit = max(0, payload_max_size - len(n_title))
n_title = n_title[0:payload_max_size]
n_body = n_body[0:body_limit]
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
or url.startswith('https://discord.com/api'))\
and 'html' in requested_output_format:
# Discord doesn't support HTML, replace <br> with newlines
n_body = n_body.strip().replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n')
n_body = newline_re.sub('\n', n_body)
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
# The plugin will use embeds (6000 char limit across all embeds) if placeholders are present,
# or plain content (2000 char limit) otherwise
# Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin)
if requested_output_format == 'html':
# No diff placeholders, use Discord markdown for any other formatting
# Use Discord markdown: strikethrough for removed, bold for added
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
# Apply 2000 char limit for plain content
payload_max_size = 1700
body_limit = max(0, payload_max_size - len(n_title))
n_title = n_title[0:payload_max_size]
n_body = n_body[0:body_limit]
# else: our custom Discord plugin will convert any placeholders left over into embeds with color bars
# Is not discord/tgram and they want htmlcolor
elif requested_output_format == 'htmlcolor':
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
n_body = newline_re.sub('<br>\n', n_body)
elif requested_output_format == 'html':
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
n_body = newline_re.sub('<br>\n', n_body)
elif requested_output_format == 'markdown':
# Markdown to HTML - Apprise will convert this to HTML
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
else: #plaintext etc default
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
return url, n_body, n_title
from ..notification_service import NotificationContextData
def process_notification(n_object: NotificationContextData, datastore):
from changedetectionio.jinja2_custom import render as jinja_render
from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
# be sure its registered
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
# Register custom Discord plugin
from .apprise_plugin.discord import NotifyDiscordCustom
if not isinstance(n_object, NotificationContextData):
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
@@ -316,18 +22,15 @@ def process_notification(n_object: NotificationContextData, datastore):
# Insert variables into the notification content
notification_parameters = create_notification_parameters(n_object, datastore)
requested_output_format = n_object.get('notification_format', default_notification_format)
logger.debug(f"Requested notification output format: '{requested_output_format}'")
n_format = valid_notification_formats.get(
n_object.get('notification_format', default_notification_format),
valid_notification_formats[default_notification_format],
)
# If we arrived with 'System default' then look it up
if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch:
# Initially text or whatever
requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format)
requested_output_format_original = requested_output_format
# Now clean it up so it fits perfectly with apprise
requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format)
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
@@ -342,122 +45,99 @@ def process_notification(n_object: NotificationContextData, datastore):
apobj = apprise.Apprise(debug=True, asset=apprise_asset)
# Override Apprise's built-in Discord plugin with our custom one
# This allows us to use colored embeds for diff content
# First remove the built-in discord plugin, then add our custom one
apprise.plugins.N_MGR.remove('discord')
apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord')
if not n_object.get('notification_urls'):
return None
n_object.update(add_rendered_diff_to_notification_vars(
notification_scan_text=n_object.get('notification_body', '')+n_object.get('notification_title', ''),
current_snapshot=n_object.get('current_snapshot'),
prev_snapshot=n_object.get('prev_snapshot'),
# Should always be false for 'text' mode or its too hard to read
# But otherwise, this could be some setting
word_diff=False if requested_output_format_original == 'text' else True,
)
)
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
for url in n_object['notification_urls']:
# Get the notification body from datastore
n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
if n_object.get('notification_format', '').startswith('HTML'):
n_body = n_body.replace("\n", '<br>')
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
if n_object.get('markup_text_links_to_html_links'):
n_body = markup_text_links_to_html(body=n_body)
url = url.strip()
if not url or url.startswith('#'):
logger.debug(f"Skipping commented out or empty notification URL - '{url}'")
if url.startswith('#'):
logger.trace(f"Skipping commented out notification URL - {url}")
continue
logger.info(f">> Process Notification: AppRise start notifying '{url}'")
if not url:
logger.warning(f"Process Notification: skipping empty notification URL.")
continue
logger.info(f">> Process Notification: AppRise notifying {url}")
url = jinja_render(template_str=url, **notification_parameters)
# If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped
watch_mime_type = n_object.get('watch_mime_type')
if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower():
if 'html' in requested_output_format:
from markupsafe import escape
n_body = str(escape(n_body))
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :(
# 2000 bytes minus -
# 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
# Length of URL - Incase they specify a longer custom avatar_url
if 'html' in requested_output_format:
# Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output
# But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and&nbsp;this" etc which is too much.
n_body = n_body.replace(' ', '&nbsp;&nbsp;')
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
k = '?' if not '?' in url else '&'
if not 'avatar_url' in url \
and not url.startswith('mail') \
and not url.startswith('post') \
and not url.startswith('get') \
and not url.startswith('delete') \
and not url.startswith('put'):
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
(url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original)
if url.startswith('tgram://'):
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
# re https://github.com/dgtlmoon/changedetection.io/issues/555
# @todo re-use an existing library we have already imported to strip all non-allowed tags
n_body = n_body.replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n')
# real limit is 4096, but minus some for extra metadata
payload_max_size = 3600
body_limit = max(0, payload_max_size - len(n_title))
n_title = n_title[0:payload_max_size]
n_body = n_body[0:body_limit]
apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS"
elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith(
'https://discord.com/api'):
# real limit is 2000, but minus some for extra metadata
payload_max_size = 1700
body_limit = max(0, payload_max_size - len(n_title))
n_title = n_title[0:payload_max_size]
n_body = n_body[0:body_limit]
if not 'format=' in url:
parsed_url = urlparse(url)
prefix_add_to_url = '?' if not parsed_url.query else '&'
elif url.startswith('mailto'):
# Apprise will default to HTML, so we need to override it
# So that whats' generated in n_body is in line with what is going to be sent.
# https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
if not 'format=' in url and (n_format == 'Text' or n_format == 'Markdown'):
prefix = '?' if not '?' in url else '&'
# Apprise format is lowercase text https://github.com/caronc/apprise/issues/633
n_format = n_format.lower()
url = f"{url}{prefix}format={n_format}"
# If n_format == HTML, then apprise email should default to text/html and we should be sending HTML only
# THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC
if 'html' in requested_output_format:
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
apprise_input_format = NotifyFormat.HTML.value
elif 'text' in requested_output_format:
url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}"
apprise_input_format = NotifyFormat.TEXT.value
apobj.add(url)
elif requested_output_format == NotifyFormat.MARKDOWN.value:
# Convert markdown to HTML ourselves since not all plugins do this
from apprise.conversion import markdown_to_html
# Make sure there are paragraph breaks around horizontal rules
n_body = n_body.replace('---', '\n\n---\n\n')
n_body = markdown_to_html(n_body)
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
requested_output_format = NotifyFormat.HTML.value
apprise_input_format = NotifyFormat.HTML.value # Changed from MARKDOWN to HTML
else:
# ?format was IN the apprise URL, they are kind of on their own here, we will try our best
if 'format=html' in url:
n_body = newline_re.sub('<br>\r\n', n_body)
# This will also prevent apprise from doing conversion
apprise_input_format = NotifyFormat.HTML.value
requested_output_format = NotifyFormat.HTML.value
elif 'format=text' in url:
apprise_input_format = NotifyFormat.TEXT.value
requested_output_format = NotifyFormat.TEXT.value
#@todo on null:// (only if its a 1 url with null) probably doesnt need to actually .add/setup/etc
sent_objs.append({'title': n_title,
'body': n_body,
'url': url,
# So that we can do a null:// call and get back exactly what would have been sent
'original_context': n_object })
'body_format': n_format})
if not url.startswith('null://'):
apobj.add(url)
# Blast off the notifications tht are set in .add()
apobj.notify(
title=n_title,
body=n_body,
body_format=n_format,
# False is not an option for AppRise, must be type None
attach=n_object.get('screenshot', None)
)
# Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely.
# It should always be similar to the 'history' part of the UI.
if url.startswith('mail') and 'html' in requested_output_format:
if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already
n_body = as_monospaced_html_email(content=n_body, title=n_title)
if not url.startswith('null://'):
apobj.notify(
title=n_title,
body=n_body,
# `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
body_format=apprise_input_format,
# False is not an option for AppRise, must be type None
attach=n_object.get('screenshot', None)
)
# Returns empty string if nothing found, multi-line string otherwise
log_value = logs.getvalue()
if log_value and ('WARNING' in log_value or 'ERROR' in log_value):
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
logger.critical(log_value)
raise Exception(log_value)
@@ -471,8 +151,6 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
if not isinstance(n_object, NotificationContextData):
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
ext_base_url = datastore.data['settings']['application'].get('active_base_url').strip('/')+'/'
watch = datastore.data['watching'].get(n_object['uuid'])
if watch:
watch_title = datastore.data['watching'][n_object['uuid']].label
@@ -486,29 +164,20 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
watch_title = 'Change Detection'
watch_tag = ''
# Create URLs to customise the notification with
# active_base_url - set in store.py data property
base_url = datastore.data['settings']['application'].get('active_base_url')
watch_url = n_object['watch_url']
# Build URLs manually instead of using url_for() to avoid requiring a request context
# This allows notifications to be processed in background threads
uuid = n_object['uuid']
diff_url = "{}/diff/{}".format(base_url, n_object['uuid'])
preview_url = "{}/preview/{}".format(base_url, n_object['uuid'])
if n_object.get('timestamp_from') and n_object.get('timestamp_to'):
# Include a link to the diff page with specific versions
diff_url = f"{ext_base_url}diff/{uuid}?from_version={n_object['timestamp_from']}&to_version={n_object['timestamp_to']}"
else:
diff_url = f"{ext_base_url}diff/{uuid}"
preview_url = f"{ext_base_url}preview/{uuid}"
edit_url = f"{ext_base_url}edit/{uuid}"
# @todo test that preview_url is correct when running in not-null mode?
# if not, first time app loads i think it can set a flask context
n_object.update(
{
'base_url': ext_base_url,
'base_url': base_url,
'diff_url': diff_url,
'preview_url': preview_url, #@todo include 'version='
'edit_url': edit_url, #@todo also pause, also mute link
'preview_url': preview_url,
'watch_tag': watch_tag if watch_tag is not None else '',
'watch_title': watch_title if watch_title is not None else '',
'watch_url': watch_url,

View File

@@ -5,83 +5,30 @@ Notification Service Module
Extracted from update_worker.py to provide standalone notification functionality
for both sync and async workers
"""
import datetime
import pytz
from loguru import logger
import time
from changedetectionio.notification import default_notification_format, valid_notification_formats
def _check_cascading_vars(datastore, var_name, watch):
"""
Check notification variables in cascading priority:
Individual watch settings > Tag settings > Global settings
"""
from changedetectionio.notification import (
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
default_notification_body,
default_notification_title
)
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
v = watch.get(var_name)
if v and not watch.get('notification_muted'):
if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
return datastore.data['settings']['application'].get('notification_format')
return v
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
if tags:
for tag_uuid, tag in tags.items():
v = tag.get(var_name)
if v and not tag.get('notification_muted'):
return v
if datastore.data['settings']['application'].get(var_name):
return datastore.data['settings']['application'].get(var_name)
# Otherwise could be defaults
if var_name == 'notification_format':
return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
if var_name == 'notification_body':
return default_notification_body
if var_name == 'notification_title':
return default_notification_title
return None
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
class NotificationContextData(dict):
def __init__(self, initial_data=None, **kwargs):
super().__init__({
'base_url': None,
'current_snapshot': None,
'diff': None,
'diff_clean': None,
'diff_added': None,
'diff_added_clean': None,
'diff_full': None,
'diff_full_clean': None,
'diff_patch': None,
'diff_removed': None,
'diff_removed_clean': None,
'diff_url': None,
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
'notification_timestamp': time.time(),
'preview_url': None,
'screenshot': None,
'triggered_text': None,
'timestamp_from': None,
'timestamp_to': None,
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
'watch_mime_type': None,
'watch_tag': None,
'watch_title': None,
'watch_url': 'https://WATCH-PLACE-HOLDER/',
'base_url': None,
'diff_url': None,
'preview_url': None,
'watch_tag': None,
'watch_title': None
})
# Apply any initial data passed in
@@ -93,126 +40,15 @@ class NotificationContextData(dict):
if kwargs:
self.update(kwargs)
n_format = self.get('notification_format')
if n_format and not valid_notification_formats.get(n_format):
raise ValueError(f'Invalid notification format: "{n_format}"')
def set_random_for_validation(self):
import random, string
"""Randomly fills all dict keys with random strings (for validation/testing).
So we can test the output in the notification body
"""
"""Randomly fills all dict keys with random strings (for validation/testing)."""
for key in self.keys():
if key in ['uuid', 'time', 'watch_uuid']:
continue
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
self[key] = rand_str
def __setitem__(self, key, value):
if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'):
if not valid_notification_formats.get(value):
raise ValueError(f'Invalid notification format: "{value}"')
super().__setitem__(key, value)
def timestamp_to_localtime(timestamp):
# Format the date using locale-aware formatting with timezone
dt = datetime.datetime.fromtimestamp(int(timestamp))
dt = dt.replace(tzinfo=pytz.UTC)
# Get local timezone-aware datetime
local_tz = datetime.datetime.now().astimezone().tzinfo
local_dt = dt.astimezone(local_tz)
# Format date with timezone - using strftime for locale awareness
try:
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
except:
# Fallback if locale issues
formatted_date = local_dt.isoformat()
return formatted_date
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
"""
Efficiently renders only the diff placeholders that are actually used in the notification text.
Scans the notification template for diff placeholder usage (diff, diff_added, diff_clean, etc.)
and only renders those specific variants, avoiding expensive render_diff() calls for unused placeholders.
Uses LRU caching to avoid duplicate renders when multiple placeholders share the same arguments.
Args:
notification_scan_text: The notification template text to scan for placeholders
prev_snapshot: Previous version of content for diff comparison
current_snapshot: Current version of content for diff comparison
word_diff: Whether to use word-level (True) or line-level (False) diffing
Returns:
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
"""
from changedetectionio import diff
import re
from functools import lru_cache
now = time.time()
# Define specifications for each diff variant
diff_specs = {
'diff': {'word_diff': word_diff},
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
'diff_added': {'word_diff': word_diff, 'include_removed': False},
'diff_added_clean': {'word_diff': word_diff, 'include_removed': False, 'include_change_type_prefix': False},
'diff_full': {'word_diff': word_diff, 'include_equal': True},
'diff_full_clean': {'word_diff': word_diff, 'include_equal': True, 'include_change_type_prefix': False},
'diff_patch': {'word_diff': word_diff, 'patch_format': True},
'diff_removed': {'word_diff': word_diff, 'include_added': False},
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
}
# Memoize render_diff to avoid duplicate renders with same kwargs
@lru_cache(maxsize=4)
def cached_render(kwargs_tuple):
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
ret = {}
rendered_count = 0
# Only check and render diff keys that exist in NotificationContextData
for key in NotificationContextData().keys():
if key.startswith('diff') and key in diff_specs:
# Check if this placeholder is actually used in the notification text
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
if re.search(pattern, notification_scan_text, re.IGNORECASE):
kwargs = diff_specs[key]
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
ret[key] = cached_render(tuple(sorted(kwargs.items())))
rendered_count += 1
if rendered_count:
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
return ret
def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggered_text, timestamp_changed=None):
n_object = {
'current_snapshot': current_snapshot,
'prev_snapshot': prev_snapshot,
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
'triggered_text': triggered_text,
'uuid': watch.get('uuid') if watch else None,
'watch_url': watch.get('url') if watch else None,
'watch_uuid': watch.get('uuid') if watch else None,
'watch_mime_type': watch.get('content-type')
}
# The \n's in the content from the above will get converted to <br> etc depending on the notification format
if watch:
n_object.update(watch.extra_notification_token_values())
return n_object
class NotificationService:
"""
Standalone notification service that handles all notification functionality
@@ -223,11 +59,12 @@ class NotificationService:
self.datastore = datastore
self.notification_q = notification_q
def queue_notification_for_watch(self, n_object: NotificationContextData, watch, date_index_from=-2, date_index_to=-1):
def queue_notification_for_watch(self, n_object: NotificationContextData, watch):
"""
Queue a notification for a watch with full diff rendering and template variables
"""
from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from changedetectionio import diff
from changedetectionio.notification import default_notification_format_for_watch
if not isinstance(n_object, NotificationContextData):
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
@@ -235,6 +72,8 @@ class NotificationService:
dates = []
trigger_text = ''
now = time.time()
if watch:
watch_history = watch.history
dates = list(watch_history.keys())
@@ -242,43 +81,102 @@ class NotificationService:
# Add text that was triggered
if len(dates):
snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1])
snapshot_contents = watch.get_history_snapshot(dates[-1])
else:
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
# If we ended up here with "System default"
if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
if n_object.get('notification_format') == default_notification_format_for_watch:
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
html_colour_enable = False
# HTML needs linebreak, but MarkDown and Text can use a linefeed
if n_object.get('notification_format') == 'HTML':
line_feed_sep = "<br>"
# Snapshot will be plaintext on the disk, convert to some kind of HTML
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
elif n_object.get('notification_format') == 'HTML Color':
line_feed_sep = "<br>"
# Snapshot will be plaintext on the disk, convert to some kind of HTML
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
html_colour_enable = True
else:
line_feed_sep = "\n"
triggered_text = ''
if len(trigger_text):
from . import html_tools
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
if triggered_text:
triggered_text = '\n'.join(triggered_text)
triggered_text = line_feed_sep.join(triggered_text)
# Could be called as a 'test notification' with only 1 snapshot available
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
if len(dates) > 1:
prev_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_from])
current_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_to])
prev_snapshot = watch.get_history_snapshot(dates[-2])
current_snapshot = watch.get_history_snapshot(dates[-1])
n_object.update({
'current_snapshot': snapshot_contents,
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'triggered_text': triggered_text,
'uuid': watch.get('uuid') if watch else None,
'watch_url': watch.get('url') if watch else None,
'watch_uuid': watch.get('uuid') if watch else None,
})
n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot,
prev_snapshot=prev_snapshot,
watch=watch,
triggered_text=triggered_text,
timestamp_changed=dates[date_index_to]))
if watch:
n_object.update(watch.extra_notification_token_values())
if self.notification_q:
logger.debug("Queued notification for sending")
self.notification_q.put(n_object)
else:
logger.debug("Not queued, no queue defined. Just returning processed data")
return n_object
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
logger.debug("Queued notification for sending")
self.notification_q.put(n_object)
def _check_cascading_vars(self, var_name, watch):
"""
Check notification variables in cascading priority:
Individual watch settings > Tag settings > Global settings
"""
from changedetectionio.notification import (
default_notification_format_for_watch,
default_notification_body,
default_notification_title
)
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
v = watch.get(var_name)
if v and not watch.get('notification_muted'):
if var_name == 'notification_format' and v == default_notification_format_for_watch:
return self.datastore.data['settings']['application'].get('notification_format')
return v
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
if tags:
for tag_uuid, tag in tags.items():
v = tag.get(var_name)
if v and not tag.get('notification_muted'):
return v
if self.datastore.data['settings']['application'].get(var_name):
return self.datastore.data['settings']['application'].get(var_name)
# Otherwise could be defaults
if var_name == 'notification_format':
return default_notification_format_for_watch
if var_name == 'notification_body':
return default_notification_body
if var_name == 'notification_title':
return default_notification_title
return None
def send_content_changed_notification(self, watch_uuid):
"""
@@ -301,11 +199,10 @@ class NotificationService:
# Should be a better parent getter in the model object
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
# this change probably not needed?
n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch)
n_object['notification_title'] = _check_cascading_vars(self.datastore,'notification_title', watch)
n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch)
n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch)
n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch)
n_object['notification_title'] = self._check_cascading_vars('notification_title', watch)
n_object['notification_body'] = self._check_cascading_vars('notification_body', watch)
n_object['notification_format'] = self._check_cascading_vars('notification_format', watch)
# (Individual watch) Only prepare to notify if the rules above matched
queued = False
@@ -328,25 +225,13 @@ class NotificationService:
if not watch:
return
filter_list = ", ".join(watch['include_filters'])
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
body = f"""Hello,
Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts.
It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab )
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
Thanks - Your omniscient changedetection.io installation.
"""
n_object = NotificationContextData({
'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
'notification_body': body,
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
", ".join(watch['include_filters']),
threshold),
'notification_format': 'text'
})
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']
@@ -374,28 +259,14 @@ Thanks - Your omniscient changedetection.io installation.
if not watch:
return
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
step = step_n + 1
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
# {{{{ }}}} because this will be Jinja2 {{ }} tokens
body = f"""Hello,
Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout?
The element may have moved and needs editing, or does it need a delay added?
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
Thanks - Your omniscient changedetection.io installation.
"""
n_object = NotificationContextData({
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
'notification_body': body,
'notification_format': self._check_cascading_vars('notification_format', watch),
'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1),
'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} "
"did not appear on the page after {} attempts, did the page change layout? "
"Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n"
"Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold),
'notification_format': 'text'
})
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']

View File

@@ -2,7 +2,6 @@ import pluggy
import os
import importlib
import sys
from loguru import logger
# Global plugin namespace for changedetection.io
PLUGIN_NAMESPACE = "changedetectionio"
@@ -17,94 +16,15 @@ class ChangeDetectionSpec:
@hookspec
def ui_edit_stats_extras(watch):
"""Return HTML content to add to the stats tab in the edit view.
Args:
watch: The watch object being edited
Returns:
str: HTML content to be inserted in the stats tab
"""
pass
@hookspec
def register_content_fetcher(self):
"""Return a tuple of (fetcher_name, fetcher_class) for content fetcher plugins.
The fetcher_name should start with 'html_' and the fetcher_class
should inherit from changedetectionio.content_fetchers.base.Fetcher
Returns:
tuple: (str: fetcher_name, class: fetcher_class)
"""
pass
@hookspec
def fetcher_status_icon(fetcher_name):
"""Return status icon HTML attributes for a content fetcher.
Args:
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
Returns:
str: HTML string containing <img> tags or other status icon elements
Empty string if no custom status icon is needed
"""
pass
@hookspec
def plugin_static_path(self):
"""Return the path to the plugin's static files directory.
Returns:
str: Absolute path to the plugin's static directory, or None if no static files
"""
pass
@hookspec
def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url):
"""Provide custom implementation of get_itemprop_availability for a specific fetcher.
This hook allows plugins to provide their own product availability detection
when their fetcher is being used. This is called as a fallback when the built-in
method doesn't find good data.
Args:
content: The HTML/text content to parse
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
fetcher_instance: The fetcher instance that generated the content
url: The URL being watched/checked
Returns:
dict or None: Dictionary with availability data:
{
'price': float or None,
'availability': str or None, # e.g., 'in stock', 'out of stock'
'currency': str or None, # e.g., 'USD', 'EUR'
}
Or None if this plugin doesn't handle this fetcher or couldn't extract data
"""
pass
@hookspec
def plugin_settings_tab(self):
"""Return settings tab information for this plugin.
This hook allows plugins to add their own settings tab to the settings page.
Settings will be saved to a separate JSON file in the datastore directory.
Returns:
dict or None: Dictionary with settings tab information:
{
'plugin_id': str, # Unique identifier (e.g., 'zyte_fetcher')
'tab_label': str, # Display name for tab (e.g., 'Zyte Fetcher')
'form_class': Form, # WTForms Form class for the settings
'template_path': str, # Optional: path to Jinja2 template (relative to plugin)
# If not provided, a default form renderer will be used
}
Or None if this plugin doesn't provide settings
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -145,328 +65,18 @@ load_plugins_from_directories()
# Discover installed plugins from external packages (if any)
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
# Function to inject datastore into plugins that need it
def inject_datastore_into_plugins(datastore):
"""Inject the global datastore into plugins that need access to settings.
This should be called after plugins are loaded and datastore is initialized.
Args:
datastore: The global ChangeDetectionStore instance
"""
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
# Check if plugin has datastore attribute and it's not set
if hasattr(plugin_obj, 'datastore'):
if plugin_obj.datastore is None:
plugin_obj.datastore = datastore
logger.debug(f"Injected datastore into plugin: {plugin_name}")
# Function to register built-in fetchers - called later from content_fetchers/__init__.py
def register_builtin_fetchers():
"""Register built-in content fetchers as internal plugins
This is called from content_fetchers/__init__.py after all fetchers are imported
to avoid circular import issues.
"""
from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium
# Register each built-in fetcher plugin
if hasattr(requests, 'requests_plugin'):
plugin_manager.register(requests.requests_plugin, 'builtin_requests')
if hasattr(playwright, 'playwright_plugin'):
plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')
if hasattr(puppeteer, 'puppeteer_plugin'):
plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
if hasattr(webdriver_selenium, 'webdriver_selenium_plugin'):
plugin_manager.register(webdriver_selenium.webdriver_selenium_plugin, 'builtin_webdriver_selenium')
# Helper function to collect UI stats extras from all plugins
def collect_ui_edit_stats_extras(watch):
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
extras_content = []
# Get all plugins that implement the ui_edit_stats_extras hook
results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
# If we have results, add them to our content
if results:
for result in results:
if result: # Skip empty results
extras_content.append(result)
return "\n".join(extras_content) if extras_content else ""
def collect_fetcher_status_icons(fetcher_name):
"""Collect status icon data from all plugins
Args:
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
Returns:
dict or None: Icon data dictionary from first matching plugin, or None
"""
# Get status icon data from plugins
results = plugin_manager.hook.fetcher_status_icon(fetcher_name=fetcher_name)
# Return first non-None result
if results:
for result in results:
if result and isinstance(result, dict):
return result
return None
def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url):
"""Get itemprop availability data from plugins as a fallback.
This is called when the built-in get_itemprop_availability doesn't find good data.
Args:
content: The HTML/text content to parse
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
fetcher_instance: The fetcher instance that generated the content
url: The URL being watched (watch.link - includes Jinja2 evaluation)
Returns:
dict or None: Availability data dictionary from first matching plugin, or None
"""
# Get availability data from plugins
results = plugin_manager.hook.get_itemprop_availability_override(
content=content,
fetcher_name=fetcher_name,
fetcher_instance=fetcher_instance,
url=url
)
# Return first non-None result with actual data
if results:
for result in results:
if result and isinstance(result, dict):
# Check if the result has any meaningful data
if result.get('price') is not None or result.get('availability'):
return result
return None
def get_active_plugins():
"""Get a list of active plugins with their descriptions.
Returns:
list: List of dictionaries with plugin information:
[
{'name': 'plugin_name', 'description': 'Plugin description'},
...
]
"""
active_plugins = []
# Get all registered plugins
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
# Skip built-in plugins (they start with 'builtin_')
if plugin_name.startswith('builtin_'):
continue
# Get plugin description if available
description = None
if hasattr(plugin_obj, '__doc__') and plugin_obj.__doc__:
description = plugin_obj.__doc__.strip().split('\n')[0] # First line only
elif hasattr(plugin_obj, 'description'):
description = plugin_obj.description
# Try to get a friendly name from the plugin
friendly_name = plugin_name
if hasattr(plugin_obj, 'name'):
friendly_name = plugin_obj.name
active_plugins.append({
'name': friendly_name,
'description': description or 'No description available'
})
return active_plugins
def get_fetcher_capabilities(watch, datastore):
"""Get capability flags for a watch's fetcher.
Args:
watch: The watch object/dict
datastore: The datastore to resolve 'system' fetcher
Returns:
dict: Dictionary with capability flags:
{
'supports_browser_steps': bool,
'supports_screenshots': bool,
'supports_xpath_element_data': bool
}
"""
# Get the fetcher name from watch
fetcher_name = watch.get('fetch_backend', 'system')
# Resolve 'system' to actual fetcher
if fetcher_name == 'system':
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
# Get the fetcher class
from changedetectionio import content_fetchers
# Try to get from built-in fetchers first
if hasattr(content_fetchers, fetcher_name):
fetcher_class = getattr(content_fetchers, fetcher_name)
return {
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
}
# Try to get from plugin-provided fetchers
# Query all plugins for registered fetchers
plugin_fetchers = plugin_manager.hook.register_content_fetcher()
for fetcher_registration in plugin_fetchers:
if fetcher_registration:
name, fetcher_class = fetcher_registration
if name == fetcher_name:
return {
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
}
# Default: no capabilities
return {
'supports_browser_steps': False,
'supports_screenshots': False,
'supports_xpath_element_data': False
}
def get_plugin_settings_tabs():
"""Get all plugin settings tabs.
Returns:
list: List of dictionaries with plugin settings tab information:
[
{
'plugin_id': str,
'tab_label': str,
'form_class': Form,
'description': str
},
...
]
"""
tabs = []
results = plugin_manager.hook.plugin_settings_tab()
for result in results:
if result and isinstance(result, dict):
# Validate required fields
if 'plugin_id' in result and 'tab_label' in result and 'form_class' in result:
tabs.append(result)
else:
logger.warning(f"Invalid plugin settings tab spec: {result}")
return tabs
def load_plugin_settings(datastore_path, plugin_id):
"""Load settings for a specific plugin from JSON file.
Args:
datastore_path: Path to the datastore directory
plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher')
Returns:
dict: Plugin settings, or empty dict if file doesn't exist
"""
import json
settings_file = os.path.join(datastore_path, f"{plugin_id}.json")
if not os.path.exists(settings_file):
return {}
try:
with open(settings_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load settings for plugin '{plugin_id}': {e}")
return {}
def save_plugin_settings(datastore_path, plugin_id, settings):
"""Save settings for a specific plugin to JSON file.
Args:
datastore_path: Path to the datastore directory
plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher')
settings: Dictionary of settings to save
Returns:
bool: True if save was successful, False otherwise
"""
import json
settings_file = os.path.join(datastore_path, f"{plugin_id}.json")
try:
with open(settings_file, 'w', encoding='utf-8') as f:
json.dump(settings, f, indent=2, ensure_ascii=False)
logger.info(f"Saved settings for plugin '{plugin_id}' to {settings_file}")
return True
except Exception as e:
logger.error(f"Failed to save settings for plugin '{plugin_id}': {e}")
return False
def get_plugin_template_paths():
"""Get list of plugin template directories for Jinja2 loader.
Scans both external pluggy plugins and built-in processor plugins.
Returns:
list: List of absolute paths to plugin template directories
"""
template_paths = []
# Scan built-in processor plugins
from changedetectionio.processors import find_processors
processor_list = find_processors()
for processor_module, processor_name in processor_list:
# Each processor is a module, check if it has a templates directory
if hasattr(processor_module, '__file__'):
processor_file = processor_module.__file__
if processor_file:
# Get the processor directory (e.g., processors/image_ssim_diff/)
processor_dir = os.path.dirname(os.path.abspath(processor_file))
templates_dir = os.path.join(processor_dir, 'templates')
if os.path.isdir(templates_dir):
template_paths.append(templates_dir)
logger.debug(f"Added processor template path: {templates_dir}")
# Get all registered external pluggy plugins
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
# Check if plugin has a templates directory
if hasattr(plugin_obj, '__file__'):
plugin_file = plugin_obj.__file__
elif hasattr(plugin_obj, '__module__'):
# Get the module file
module = sys.modules.get(plugin_obj.__module__)
if module and hasattr(module, '__file__'):
plugin_file = module.__file__
else:
continue
else:
continue
if plugin_file:
plugin_dir = os.path.dirname(os.path.abspath(plugin_file))
templates_dir = os.path.join(plugin_dir, 'templates')
if os.path.isdir(templates_dir):
template_paths.append(templates_dir)
logger.debug(f"Added plugin template path: {templates_dir}")
return template_paths
return "\n".join(extras_content) if extras_content else ""

View File

@@ -23,7 +23,6 @@ class difference_detection_processor():
def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, **kwargs)
self.datastore = datastore
self.watch_uuid = watch_uuid
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
# Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher()
@@ -92,8 +91,6 @@ class difference_detection_processor():
else:
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
self.fetcher = fetcher_obj(proxy_override=proxy_url,
@@ -161,7 +158,6 @@ class difference_detection_processor():
request_method=request_method,
timeout=timeout,
url=url,
watch_uuid=self.watch_uuid,
)
#@todo .quit here could go on close object, so we can run JS if change-detected
@@ -169,66 +165,6 @@ class difference_detection_processor():
# After init, call run_changedetection() which will do the actual change-detection
def get_extra_watch_config(self, filename):
"""
Read processor-specific JSON config file from watch data directory.
Args:
filename: Name of JSON file (e.g., "visual_ssim_score.json")
Returns:
dict: Parsed JSON data, or empty dict if file doesn't exist
"""
import json
import os
watch = self.datastore.data['watching'].get(self.watch_uuid)
watch_data_dir = watch.watch_data_dir
if not watch_data_dir:
return {}
filepath = os.path.join(watch_data_dir, filename)
if not os.path.isfile(filepath):
return {}
try:
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.warning(f"Failed to read extra watch config {filename}: {e}")
return {}
def update_extra_watch_config(self, filename, data):
"""
Write processor-specific JSON config file to watch data directory.
Args:
filename: Name of JSON file (e.g., "visual_ssim_score.json")
data: Dictionary to serialize as JSON
"""
import json
import os
watch = self.datastore.data['watching'].get(self.watch_uuid)
watch_data_dir = watch.watch_data_dir
if not watch_data_dir:
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
return
# Ensure directory exists
watch.ensure_data_dir_exists()
filepath = os.path.join(watch_data_dir, filename)
try:
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
except IOError as e:
logger.error(f"Failed to write extra watch config {filename}: {e}")
@abstractmethod
def run_changedetection(self, watch):
update_obj = {'last_notification_error': False, 'last_error': False}
@@ -306,41 +242,15 @@ def get_custom_watch_obj_for_processor(processor_name):
def available_processors():
"""
Get a list of processors by name and description for the UI elements.
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
Get a list of processors by name and description for the UI elements
:return: A list :)
"""
processor_classes = find_processors()
# Check if ALLOWED_PROCESSORS env var is set
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', '').strip()
allowed_processors = None
if allowed_processors_env:
# Parse comma-separated list and strip whitespace
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
available = []
for module, sub_package_name in processor_classes:
# Filter by allowed processors if set
if allowed_processors and sub_package_name not in allowed_processors:
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
continue
# Try to get the 'name' attribute from the processor module first
if hasattr(module, 'name'):
description = module.name
else:
# Fall back to processor_description from parent module's __init__.py
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_description'):
description = parent_module.processor_description
else:
# Final fallback to a readable name
description = sub_package_name.replace('_', ' ').title()
available.append((sub_package_name, description))
for package, processor_class in processor_classes:
available.append((processor_class, package.name))
return available

View File

@@ -1,131 +0,0 @@
"""
Base data extraction module for all processors.
This module handles extracting data from watch history using regex patterns
and exporting to CSV format. This is the default extractor that all processors
(text_json_diff, restock_diff, etc.) can use by default or override.
"""
import os
from loguru import logger
def render_form(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
"""
Render the data extraction form.
Args:
watch: The watch object
datastore: The ChangeDetectionStore instance
request: Flask request object
url_for: Flask url_for function
render_template: Flask render_template function
flash: Flask flash function
redirect: Flask redirect function
extract_form: Optional pre-built extract form (for error cases)
Returns:
Rendered HTML response with the extraction form
"""
from changedetectionio import forms
uuid = watch.get('uuid')
# Use provided form or create a new one
if extract_form is None:
extract_form = forms.extractDataForm(
formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
# Get error information for the template
screenshot_url = watch.get_screenshot()
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
# Use the shared default template from processors/templates/
# Processors can override this by creating their own extract.py with custom template logic
output = render_template(
"extract.html",
uuid=uuid,
extract_form=extract_form,
watch_a=watch,
last_error=watch['last_error'],
last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(),
screenshot=screenshot_url,
is_html_webdriver=is_html_webdriver,
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
extra_title=f" - {watch.label} - Extract Data",
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
pure_menu_fixed=False
)
return output
def process_extraction(watch, datastore, request, url_for, make_response, send_from_directory, flash, redirect, extract_form=None):
"""
Process the data extraction request and return CSV file.
Args:
watch: The watch object
datastore: The ChangeDetectionStore instance
request: Flask request object
url_for: Flask url_for function
make_response: Flask make_response function
send_from_directory: Flask send_from_directory function
flash: Flask flash function
redirect: Flask redirect function
extract_form: Optional pre-built extract form
Returns:
CSV file download response or redirect to form on error
"""
from changedetectionio import forms
uuid = watch.get('uuid')
# Use provided form or create a new one
if extract_form is None:
extract_form = forms.extractDataForm(
formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
if not extract_form.validate():
flash("An error occurred, please see below.", "error")
# render_template needs to be imported from Flask for this to work
from flask import render_template as flask_render_template
return render_form(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=flask_render_template,
flash=flash,
redirect=redirect,
extract_form=extract_form
)
extract_regex = request.form.get('extract_regex', '').strip()
output = watch.extract_regex_from_all_history(extract_regex)
if output:
watch_dir = os.path.join(datastore.datastore_path, uuid)
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
response.headers['Content-type'] = 'text/csv'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = "0"
return response
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
return redirect(url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid))

View File

@@ -1,210 +0,0 @@
# Fast Screenshot Comparison Processor
Visual/screenshot change detection using ultra-fast image comparison algorithms.
## Overview
This processor uses **OpenCV** by default for screenshot comparison, providing **50-100x faster** performance compared to the previous SSIM implementation while still detecting meaningful visual changes.
## Current Features
- **Ultra-fast OpenCV comparison**: cv2.absdiff with Gaussian blur for noise reduction
- **MD5 pre-check**: Fast identical image detection before expensive comparison
- **Configurable sensitivity**: Threshold-based change detection
- **Three-panel diff view**: Previous | Current | Difference (with red highlights)
- **Direct image support**: Works with browser screenshots AND direct image URLs
- **Visual selector support**: Compare specific page regions using CSS/XPath selectors
- **Download images**: Download any of the three comparison images directly from the diff view
## Performance
- **OpenCV (default)**: 50-100x faster than SSIM
- **Large screenshots**: Automatic downscaling for diff visualization (configurable via `MAX_DIFF_HEIGHT`/`MAX_DIFF_WIDTH`)
- **Memory efficient**: Explicit cleanup of large objects for long-running processes
- **JPEG diff images**: Smaller file sizes, faster rendering
## How It Works
1. **Fetch**: Screenshot captured via browser OR direct image URL fetched
2. **MD5 Check**: Quick hash comparison - if identical, skip comparison
3. **Region Selection** (optional): Crop to specific page region if visual selector is configured
4. **OpenCV Comparison**: Fast pixel-level difference detection with Gaussian blur
5. **Change Detection**: Percentage of changed pixels above threshold = change detected
6. **Visualization**: Generate diff image with red-highlighted changed regions
## Architecture
### Default Method: OpenCV
The processor uses OpenCV's `cv2.absdiff()` for ultra-fast pixel-level comparison:
```python
# Convert to grayscale
gray_from = cv2.cvtColor(image_from, cv2.COLOR_RGB2GRAY)
gray_to = cv2.cvtColor(image_to, cv2.COLOR_RGB2GRAY)
# Apply Gaussian blur (reduces noise, controlled by OPENCV_BLUR_SIGMA env var)
gray_from = cv2.GaussianBlur(gray_from, (0, 0), sigma=0.8)
gray_to = cv2.GaussianBlur(gray_to, (0, 0), sigma=0.8)
# Calculate absolute difference
diff = cv2.absdiff(gray_from, gray_to)
# Apply threshold (default: 30)
_, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
# Count changed pixels
change_percentage = (changed_pixels / total_pixels) * 100
```
### Optional: Pixelmatch
For users who need better anti-aliasing detection (especially for text-heavy screenshots), **pixelmatch** can be optionally installed:
```bash
pip install pybind11-pixelmatch>=0.1.3
```
**Note**: Pixelmatch uses a C++17 implementation via pybind11 and may have build issues on some platforms (particularly Alpine/musl systems with symbolic link security restrictions). The application will automatically fall back to OpenCV if pixelmatch is not available.
To use pixelmatch instead of OpenCV, set the environment variable:
```bash
COMPARISON_METHOD=pixelmatch
```
#### When to use pixelmatch:
- Screenshots with lots of text and anti-aliasing
- Need to ignore minor font rendering differences between browser versions
- 10-20x faster than SSIM (but slower than OpenCV)
#### When to stick with OpenCV (default):
- General webpage monitoring
- Maximum performance (50-100x faster than SSIM)
- Simple pixel-level change detection
- Avoid build dependencies (Alpine/musl systems)
## Configuration
### Environment Variables
```bash
# Comparison method (opencv or pixelmatch)
COMPARISON_METHOD=opencv # Default
# OpenCV threshold (0-255, lower = more sensitive)
COMPARISON_THRESHOLD_OPENCV=30 # Default
# Pixelmatch threshold (0-100, mapped to 0-1 scale)
COMPARISON_THRESHOLD_PIXELMATCH=10 # Default
# Gaussian blur sigma for OpenCV (0 = no blur, higher = more blur)
OPENCV_BLUR_SIGMA=0.8 # Default
# Minimum change percentage to trigger detection
OPENCV_MIN_CHANGE_PERCENT=0.1 # Default (0.1%)
PIXELMATCH_MIN_CHANGE_PERCENT=0.1 # Default
# Diff visualization image size limits (pixels)
MAX_DIFF_HEIGHT=8000 # Default
MAX_DIFF_WIDTH=900 # Default
```
### Per-Watch Configuration
- **Comparison Threshold**: Can be configured per-watch in the edit form
- Very low sensitivity (10) - Only major changes
- Low sensitivity (20) - Significant changes
- Medium sensitivity (30) - Moderate changes (default)
- High sensitivity (50) - Small changes
- Very high sensitivity (75) - Any visible change
### Visual Selector (Region Comparison)
Use the "Include filters" field with CSS selectors or XPath to compare only specific page regions:
```
.content-area
//div[@id='main']
```
The processor will automatically crop both screenshots to the bounding box of the first matched element.
## Dependencies
### Required
- `opencv-python-headless>=4.8.0.76` - Fast image comparison
- `Pillow (PIL)` - Image loading and manipulation
- `numpy` - Array operations
### Optional
- `pybind11-pixelmatch>=0.1.3` - Alternative comparison method with anti-aliasing detection
## Change Detection Interpretation
- **0%** = Identical images (or below minimum change threshold)
- **0.1-1%** = Minor differences (anti-aliasing, slight rendering differences)
- **1-5%** = Noticeable changes (text updates, small content changes)
- **5-20%** = Significant changes (layout shifts, content additions)
- **>20%** = Major differences (page redesign, large content changes)
## Technical Notes
### Memory Management
```python
# Explicit cleanup for long-running processes
img.close() # Close PIL Images
buffer.close() # Close BytesIO buffers
del large_array # Mark numpy arrays for GC
```
### Diff Image Generation
- Format: JPEG (quality=85, optimized)
- Highlight: Red overlay (50% blend with original)
- Auto-downscaling: Large screenshots downscaled for faster rendering
- Base64 embedded: For direct template rendering
### OpenCV Blur Parameters
The Gaussian blur reduces sensitivity to:
- Font rendering differences
- Anti-aliasing variations
- JPEG compression artifacts
- Minor pixel shifts (1-2 pixels)
Increase `OPENCV_BLUR_SIGMA` to make comparison more tolerant of these differences.
## Comparison: OpenCV vs Pixelmatch vs SSIM
| Feature | OpenCV | Pixelmatch | SSIM (old) |
|---------|--------|------------|------------|
| **Speed** | 50-100x faster | 10-20x faster | Baseline |
| **Anti-aliasing** | Via blur | Built-in detection | Built-in |
| **Text sensitivity** | High | Medium (AA-aware) | Medium |
| **Dependencies** | opencv-python-headless | pybind11-pixelmatch + C++ compiler | scikit-image |
| **Alpine/musl support** | ✅ Yes | ⚠️ Build issues | ✅ Yes |
| **Memory usage** | Low | Low | High |
| **Best for** | General use, max speed | Text-heavy screenshots | Deprecated |
## Migration from SSIM
If you're upgrading from the old SSIM-based processor:
1. **Thresholds are different**: SSIM used 0-1 scale (higher = more similar), OpenCV uses 0-255 pixel difference (lower = more similar)
2. **Default threshold**: Start with 30 for OpenCV, adjust based on your needs
3. **Performance**: Expect dramatically faster comparisons, especially for large screenshots
4. **Accuracy**: OpenCV is more sensitive to pixel-level changes; increase `OPENCV_BLUR_SIGMA` if you're getting false positives
## Future Enhancements
Potential features for future consideration:
- **Change region detection**: Highlight specific areas that changed with bounding boxes
- **Perceptual hashing**: Pre-screening filter for even faster checks
- **Ignore regions**: Exclude specific page areas (ads, timestamps) from comparison
- **Text extraction**: OCR-based text comparison for semantic changes
- **Adaptive thresholds**: Different sensitivity for different page regions
## Resources
- [OpenCV Documentation](https://docs.opencv.org/)
- [pybind11-pixelmatch GitHub](https://github.com/whtsky/pybind11-pixelmatch)
- [Pixelmatch (original JS library)](https://github.com/mapbox/pixelmatch)

View File

@@ -1,17 +0,0 @@
"""
Visual/screenshot change detection using fast image comparison algorithms.
This processor compares screenshots using OpenCV (cv2.absdiff) or pixelmatch algorithms,
which are 10-100x faster than SSIM while still detecting meaningful visual changes
and handling antialiasing differences.
"""
import os
processor_description = "Visual/Screenshot change detection (Fast)"
processor_name = "image_ssim_diff"
# Default comparison settings
DEFAULT_COMPARISON_METHOD = os.getenv('COMPARISON_METHOD', 'opencv')
DEFAULT_COMPARISON_THRESHOLD_OPENCV = float(os.getenv('COMPARISON_THRESHOLD_OPENCV', '30'))
DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH = float(os.getenv('COMPARISON_THRESHOLD_PIXELMATCH', '10'))

View File

@@ -1,418 +0,0 @@
"""
Screenshot diff visualization for fast image comparison processor.
Generates side-by-side comparison with red-highlighted differences using
OpenCV or pixelmatch for fast rendering (10-100x faster than SSIM).
"""
import os
import json
import base64
import io
import time
from loguru import logger
from PIL import Image
import numpy as np
import cv2
from . import DEFAULT_COMPARISON_METHOD, DEFAULT_COMPARISON_THRESHOLD_OPENCV, DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH
# Maximum dimensions for diff visualization (can be overridden via environment variable)
# Large screenshots don't need full resolution for visual inspection
MAX_DIFF_HEIGHT = int(os.getenv('MAX_DIFF_HEIGHT', '8000'))
MAX_DIFF_WIDTH = int(os.getenv('MAX_DIFF_WIDTH', '900'))
def _resize_for_diff(img, max_height=MAX_DIFF_HEIGHT, max_width=MAX_DIFF_WIDTH):
"""
Downscale image if too large for faster diff visualization.
Users don't need pixel-perfect diffs at 20000px resolution.
Downscaling to 2000px is 100x faster and still shows all visible changes.
Args:
img: PIL Image
max_height: Maximum height in pixels
max_width: Maximum width in pixels
Returns:
PIL Image (resized if needed)
"""
if img.height > max_height or img.width > max_width:
# Calculate scaling factor to fit within max dimensions
height_ratio = max_height / img.height if img.height > max_height else 1.0
width_ratio = max_width / img.width if img.width > max_width else 1.0
ratio = min(height_ratio, width_ratio)
new_size = (int(img.width * ratio), int(img.height * ratio))
logger.debug(f"Downscaling diff visualization: {img.size} -> {new_size} ({ratio*100:.1f}% scale)")
return img.resize(new_size, Image.Resampling.LANCZOS)
return img
def calculate_diff_opencv(img_bytes_from, img_bytes_to, threshold=30):
"""
Calculate image difference using OpenCV cv2.absdiff.
This is the fastest method for diff visualization.
Args:
img_bytes_from: Previous screenshot (bytes)
img_bytes_to: Current screenshot (bytes)
threshold: Pixel difference threshold (0-255)
Returns:
tuple: (change_percentage, diff_mask) where diff_mask is a 2D numpy binary mask
"""
# Load images from BytesIO buffers
buf_from = io.BytesIO(img_bytes_from)
buf_to = io.BytesIO(img_bytes_to)
img_from = Image.open(buf_from)
img_to = Image.open(buf_to)
# Ensure images are the same size
if img_from.size != img_to.size:
img_from = img_from.resize(img_to.size, Image.Resampling.LANCZOS)
# Downscale large images for faster diff visualization
# A 20000px tall screenshot doesn't need full resolution for visual inspection
img_from = _resize_for_diff(img_from)
img_to = _resize_for_diff(img_to)
# Convert to numpy arrays
arr_from = np.array(img_from)
arr_to = np.array(img_to)
# Convert to grayscale
if len(arr_from.shape) == 3:
gray_from = cv2.cvtColor(arr_from, cv2.COLOR_RGB2GRAY)
gray_to = cv2.cvtColor(arr_to, cv2.COLOR_RGB2GRAY)
else:
gray_from = arr_from
gray_to = arr_to
# Optional Gaussian blur to reduce noise
blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8"))
if blur_sigma > 0:
gray_from = cv2.GaussianBlur(gray_from, (0, 0), blur_sigma)
gray_to = cv2.GaussianBlur(gray_to, (0, 0), blur_sigma)
# Calculate absolute difference
diff = cv2.absdiff(gray_from, gray_to)
# Apply threshold to create binary mask
_, diff_mask = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
# Calculate change percentage
changed_pixels = np.count_nonzero(diff_mask)
total_pixels = diff_mask.size
change_percentage = (changed_pixels / total_pixels) * 100
# Explicit memory cleanup - close images and buffers, delete large arrays
img_from.close()
img_to.close()
buf_from.close()
buf_to.close()
del arr_from, arr_to, gray_from, gray_to, diff
return float(change_percentage), diff_mask
def calculate_diff_pixelmatch(img_bytes_from, img_bytes_to, threshold=0.1):
"""
Calculate image difference using pixelmatch.
Args:
img_bytes_from: Previous screenshot (bytes)
img_bytes_to: Current screenshot (bytes)
threshold: Color difference threshold (0-1)
Returns:
tuple: (change_percentage, diff_array) where diff_array is RGBA numpy array
"""
try:
from pybind11_pixelmatch import pixelmatch, Options
except ImportError:
logger.warning("pybind11-pixelmatch not installed, falling back to OpenCV")
return calculate_diff_opencv(img_bytes_from, img_bytes_to, threshold * 255)
# Load images from BytesIO buffers
buf_from = io.BytesIO(img_bytes_from)
buf_to = io.BytesIO(img_bytes_to)
img_from = Image.open(buf_from)
img_to = Image.open(buf_to)
# Ensure images are the same size
if img_from.size != img_to.size:
img_from = img_from.resize(img_to.size, Image.Resampling.LANCZOS)
# Downscale large images for faster diff visualization
img_from = _resize_for_diff(img_from)
img_to = _resize_for_diff(img_to)
# Convert to RGB
if img_from.mode != 'RGB':
img_from = img_from.convert('RGB')
if img_to.mode != 'RGB':
img_to = img_to.convert('RGB')
# Convert to numpy arrays
arr_from = np.array(img_from)
arr_to = np.array(img_to)
# Add alpha channel (pixelmatch expects RGBA)
if arr_from.shape[2] == 3:
alpha = np.ones((arr_from.shape[0], arr_from.shape[1], 1), dtype=np.uint8) * 255
arr_from = np.concatenate([arr_from, alpha], axis=2)
arr_to = np.concatenate([arr_to, alpha], axis=2)
# Create diff output array
diff_array = np.zeros_like(arr_from)
# Configure pixelmatch options
opts = Options()
opts.threshold = threshold
opts.includeAA = True # Detect anti-aliasing
opts.alpha = 0.1 # Opacity of diff overlay
# Run pixelmatch
width, height = img_from.size
num_diff_pixels = pixelmatch(
arr_from,
arr_to,
output=diff_array,
options=opts
)
# Calculate change percentage
total_pixels = width * height
change_percentage = (num_diff_pixels / total_pixels) * 100
# Explicit memory cleanup - close images and buffers, delete large arrays
img_from.close()
img_to.close()
buf_from.close()
buf_to.close()
del arr_from, arr_to
if 'alpha' in locals():
del alpha
return float(change_percentage), diff_array
def generate_diff_image_opencv(img_bytes_to, diff_mask):
"""
Generate a difference image with red highlights using OpenCV.
This is the fastest method for generating diff visualization.
Args:
img_bytes_to: Current screenshot (bytes)
diff_mask: Binary mask of changed pixels (2D numpy array)
Returns:
bytes: PNG image with red highlights on changed pixels
"""
# Load current image as base from BytesIO buffer
buf_to = io.BytesIO(img_bytes_to)
img_to = Image.open(buf_to)
# Downscale for faster diff visualization
img_to = _resize_for_diff(img_to)
# Convert to RGB
if img_to.mode != 'RGB':
img_to = img_to.convert('RGB')
result_array = np.array(img_to)
# Ensure mask is same size as image
if diff_mask.shape != result_array.shape[:2]:
diff_mask = cv2.resize(diff_mask, (result_array.shape[1], result_array.shape[0]))
# Create boolean mask
changed_mask = diff_mask > 0
# Apply red highlight where mask is True (50% blend)
result_array[changed_mask] = (
result_array[changed_mask] * 0.5 + np.array([255, 0, 0]) * 0.5
).astype(np.uint8)
# Convert back to PIL Image
diff_img = Image.fromarray(result_array.astype(np.uint8))
# Save to bytes as JPEG (smaller and faster than PNG for diff visualization)
buf = io.BytesIO()
diff_img.save(buf, format='JPEG', quality=85, optimize=True)
diff_bytes = buf.getvalue()
# Explicit memory cleanup - close files and buffers, delete large objects
buf.close()
buf_to.close()
diff_img.close()
img_to.close()
del result_array, changed_mask, diff_mask
return diff_bytes
def generate_diff_image_pixelmatch(diff_array):
"""
Generate a difference image from pixelmatch diff array.
Args:
diff_array: RGBA diff array from pixelmatch (4D numpy array)
Returns:
bytes: JPEG image with highlighted differences
"""
# Convert diff array to PIL Image (RGBA)
diff_img = Image.fromarray(diff_array.astype(np.uint8), mode='RGBA')
# Convert RGBA to RGB for JPEG (JPEG doesn't support transparency)
diff_img = diff_img.convert('RGB')
# Save to bytes as JPEG (smaller and faster than PNG)
buf = io.BytesIO()
diff_img.save(buf, format='JPEG', quality=85, optimize=True)
diff_bytes = buf.getvalue()
# Explicit memory cleanup - close files first, then delete
buf.close()
diff_img.close()
return diff_bytes
def render(watch, datastore, request, url_for, render_template, flash, redirect):
"""
Render the screenshot comparison diff page.
Args:
watch: Watch object
datastore: Datastore object
request: Flask request
url_for: Flask url_for function
render_template: Flask render_template function
flash: Flask flash function
redirect: Flask redirect function
Returns:
Rendered template or redirect
"""
# Get version parameters (from_version, to_version)
versions = list(watch.history.keys())
if len(versions) < 2:
flash("Not enough history to compare. Need at least 2 snapshots.", "error")
return redirect(url_for('watchlist.index'))
# Default: compare latest two versions
from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0])
to_version = request.args.get('to_version', versions[-1])
# Validate versions exist
if from_version not in versions:
from_version = versions[-2] if len(versions) >= 2 else versions[0]
if to_version not in versions:
to_version = versions[-1]
# Use hardcoded comparison method (can be overridden via COMPARISON_METHOD env var)
comparison_method = DEFAULT_COMPARISON_METHOD
logger.debug(f"Using comparison_method {comparison_method}")
# Get threshold (per-watch > global > env default)
threshold = watch.get('comparison_threshold')
if not threshold or threshold == '':
default_threshold = (
DEFAULT_COMPARISON_THRESHOLD_OPENCV if comparison_method == 'opencv'
else DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH
)
threshold = datastore.data['settings']['application'].get('comparison_threshold', default_threshold)
# Convert threshold to appropriate type
try:
threshold = float(threshold)
# For pixelmatch, convert from 0-100 scale to 0-1 scale
if comparison_method == 'pixelmatch':
threshold = threshold / 100.0
except (ValueError, TypeError):
logger.warning(f"Invalid threshold value '{threshold}', using default")
threshold = 30.0 if comparison_method == 'opencv' else 0.1
# Load screenshots from history
try:
img_bytes_from = watch.get_history_snapshot(timestamp=from_version)
img_bytes_to = watch.get_history_snapshot(timestamp=to_version)
# Convert to bytes if needed (should already be bytes for screenshots)
if isinstance(img_bytes_from, str):
img_bytes_from = img_bytes_from.encode('utf-8')
if isinstance(img_bytes_to, str):
img_bytes_to = img_bytes_to.encode('utf-8')
except Exception as e:
logger.error(f"Failed to load screenshots: {e}")
flash(f"Failed to load screenshots: {e}", "error")
return redirect(url_for('watchlist.index'))
# Calculate diff and generate difference image based on method
now = time.time()
try:
if comparison_method == 'pixelmatch':
change_percentage, diff_data = calculate_diff_pixelmatch(img_bytes_from, img_bytes_to, threshold)
diff_image_bytes = generate_diff_image_pixelmatch(diff_data)
method_display = f"Pixelmatch (threshold: {threshold*100:.0f}%)"
del diff_data # Clean up diff array
else: # opencv
change_percentage, diff_mask = calculate_diff_opencv(img_bytes_from, img_bytes_to, threshold)
diff_image_bytes = generate_diff_image_opencv(img_bytes_to, diff_mask)
method_display = f"OpenCV (threshold: {threshold:.0f})"
del diff_mask # Clean up diff mask
except Exception as e:
logger.error(f"Failed to generate diff: {e}")
flash(f"Failed to generate diff: {e}", "error")
return redirect(url_for('watchlist.index'))
finally:
logger.debug(f"Done '{comparison_method}' in {time.time() - now:.2f}s")
# Convert images to base64 for embedding in template
img_from_b64 = base64.b64encode(img_bytes_from).decode('utf-8')
img_to_b64 = base64.b64encode(img_bytes_to).decode('utf-8')
diff_image_b64 = base64.b64encode(diff_image_bytes).decode('utf-8')
# Clean up large byte objects after base64 encoding
del img_bytes_from
del img_bytes_to
del diff_image_bytes
# Load historical data if available (for charts/visualization)
comparison_data = {}
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
if os.path.isfile(comparison_config_path):
try:
with open(comparison_config_path, 'r') as f:
comparison_data = json.load(f)
except Exception as e:
logger.warning(f"Failed to load comparison history data: {e}")
# Render custom template
# Template path is namespaced to avoid conflicts with other processors
return render_template(
'image_ssim_diff/diff.html',
watch=watch,
uuid=watch.get('uuid'),
img_from_b64=img_from_b64,
img_to_b64=img_to_b64,
diff_image_b64=diff_image_b64,
change_percentage=change_percentage,
comparison_data=comparison_data, # Full history for charts/visualization
threshold=threshold,
comparison_method=method_display,
versions=versions,
from_version=from_version,
to_version=to_version,
percentage_different=change_percentage
)

View File

@@ -1,45 +0,0 @@
"""
Configuration forms for fast screenshot comparison processor.
"""
from wtforms import SelectField, validators
from changedetectionio.forms import processor_text_json_diff_form
class processor_settings_form(processor_text_json_diff_form):
"""Form for fast image comparison processor settings."""
comparison_threshold = SelectField(
'Screenshot Comparison Sensitivity',
choices=[
('', 'Use global default'),
('10', 'Very low sensitivity (only major changes)'),
('20', 'Low sensitivity (significant changes)'),
('30', 'Medium sensitivity (moderate changes)'),
('50', 'High sensitivity (small changes)'),
('75', 'Very high sensitivity (any visible change)')
],
validators=[validators.Optional()],
default=''
)
def extra_tab_content(self):
"""Tab label for processor-specific settings."""
return 'Screenshot Comparison'
def extra_form_content(self):
"""Render processor-specific form fields."""
return '''
{% from '_helpers.html' import render_field %}
<fieldset>
<legend>Screenshot Comparison Settings</legend>
<div class="pure-control-group">
{{ render_field(form.comparison_threshold) }}
<span class="pure-form-message-inline">
Controls how sensitive the screenshot comparison is to visual changes.<br>
<strong>Higher sensitivity</strong> = detects smaller changes but may trigger on minor rendering differences.<br>
Select "Use global default" to inherit the system-wide setting.
</span>
</div>
</fieldset>
'''

View File

@@ -1,78 +0,0 @@
"""
Preview rendering for SSIM screenshot processor.
Renders images properly in the browser instead of showing raw bytes.
"""
import base64
from loguru import logger
def render(watch, datastore, request, url_for, render_template, flash, redirect):
"""
Render the preview page for screenshot watches.
Args:
watch: Watch object
datastore: Datastore object
request: Flask request
url_for: Flask url_for function
render_template: Flask render_template function
flash: Flask flash function
redirect: Flask redirect function
Returns:
Rendered template or redirect
"""
versions = list(watch.history.keys())
if len(versions) == 0:
flash("Preview unavailable - No snapshots captured yet", "error")
return redirect(url_for('watchlist.index'))
# Get the version to display (default: latest)
preferred_version = request.args.get('version')
timestamp = versions[-1]
if preferred_version and preferred_version in versions:
timestamp = preferred_version
# Load screenshot from history
try:
screenshot_bytes = watch.get_history_snapshot(timestamp=timestamp)
# Convert to bytes if needed (should already be bytes for screenshots)
if isinstance(screenshot_bytes, str):
screenshot_bytes = screenshot_bytes.encode('utf-8')
# Detect image format
if screenshot_bytes[:8] == b'\x89PNG\r\n\x1a\n':
mime_type = 'image/png'
elif screenshot_bytes[:3] == b'\xff\xd8\xff':
mime_type = 'image/jpeg'
elif screenshot_bytes[:6] in (b'GIF87a', b'GIF89a'):
mime_type = 'image/gif'
elif screenshot_bytes[:4] == b'RIFF' and screenshot_bytes[8:12] == b'WEBP':
mime_type = 'image/webp'
else:
mime_type = 'image/png' # Default fallback
# Convert to base64 for embedding in HTML
img_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
except Exception as e:
logger.error(f"Failed to load screenshot: {e}")
flash(f"Failed to load screenshot: {e}", "error")
return redirect(url_for('watchlist.index'))
# Render custom template for image preview
# Template path is namespaced to avoid conflicts with other processors
return render_template(
'image_ssim_diff/preview.html',
watch=watch,
uuid=watch.get('uuid'),
img_b64=img_b64,
mime_type=mime_type,
versions=versions,
timestamp=timestamp,
current_diff_url=watch['url']
)

View File

@@ -1,377 +0,0 @@
"""
Core fast screenshot comparison processor.
This processor uses OpenCV or pixelmatch algorithms to detect visual changes
in screenshots. Both methods are dramatically faster than SSIM (10-100x speedup)
while still being effective at detecting meaningful changes.
"""
import hashlib
import os
import time
from loguru import logger
from changedetectionio.processors import difference_detection_processor
from changedetectionio.processors.exceptions import ProcessorException
from . import DEFAULT_COMPARISON_METHOD, DEFAULT_COMPARISON_THRESHOLD_OPENCV, DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH
name = 'Visual/Screenshot change detection (Fast)'
description = 'Compares screenshots using fast algorithms (OpenCV or pixelmatch), 10-100x faster than SSIM'
class perform_site_check(difference_detection_processor):
"""Fast screenshot comparison processor."""
def run_changedetection(self, watch):
"""
Perform screenshot comparison using OpenCV or pixelmatch.
Returns:
tuple: (changed_detected, update_obj, screenshot_bytes)
"""
now = time.time()
# Get the current screenshot
if not self.fetcher.screenshot:
raise ProcessorException(
message="No screenshot available. Ensure the watch is configured to use a real browser.",
url=watch.get('url')
)
self.screenshot = self.fetcher.screenshot
self.xpath_data = self.fetcher.xpath_data
# Quick MD5 check - skip expensive comparison if images are identical
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
current_md5 = hashlib.md5(self.screenshot).hexdigest()
previous_md5 = watch.get('previous_md5')
if previous_md5 and current_md5 == previous_md5:
logger.debug(f"Screenshot MD5 unchanged ({current_md5}), skipping comparison")
raise checksumFromPreviousCheckWasTheSame()
from PIL import Image
import io
import numpy as np
# Use hardcoded comparison method (can be overridden via COMPARISON_METHOD env var)
comparison_method = DEFAULT_COMPARISON_METHOD
# Get threshold (per-watch > global > env default)
threshold = watch.get('comparison_threshold')
if not threshold or threshold == '':
default_threshold = (
DEFAULT_COMPARISON_THRESHOLD_OPENCV if comparison_method == 'opencv'
else DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH
)
threshold = self.datastore.data['settings']['application'].get('comparison_threshold', default_threshold)
# Convert string to appropriate type
try:
threshold = float(threshold)
# For pixelmatch, convert from 0-100 scale to 0-1 scale
if comparison_method == 'pixelmatch':
threshold = threshold / 100.0
except (ValueError, TypeError):
logger.warning(f"Invalid threshold value '{threshold}', using default")
threshold = 30.0 if comparison_method == 'opencv' else 0.1
# Convert current screenshot to PIL Image
try:
current_img = Image.open(io.BytesIO(self.screenshot))
except Exception as e:
raise ProcessorException(
message=f"Failed to load current screenshot: {e}",
url=watch.get('url')
)
# Check if visual selector (include_filters) is set for region-based comparison
include_filters = watch.get('include_filters', [])
crop_region = None
if include_filters and len(include_filters) > 0:
# Get the first filter to use for cropping
first_filter = include_filters[0].strip()
if first_filter and self.xpath_data:
try:
import json
# xpath_data is JSON string from browser
xpath_data_obj = json.loads(self.xpath_data) if isinstance(self.xpath_data, str) else self.xpath_data
# Find the bounding box for the first filter
for element in xpath_data_obj.get('size_pos', []):
# Match the filter with the element's xpath
if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'):
# Found the element - extract crop coordinates
left = element.get('left', 0)
top = element.get('top', 0)
width = element.get('width', 0)
height = element.get('height', 0)
# PIL crop uses (left, top, right, bottom)
crop_region = (
max(0, left),
max(0, top),
min(current_img.width, left + width),
min(current_img.height, top + height)
)
logger.info(f"Visual selector enabled: cropping to region {crop_region} for filter: {first_filter}")
break
except Exception as e:
logger.warning(f"Failed to parse xpath_data for visual selector: {e}")
# Crop the current image if region was found
if crop_region:
try:
current_img = current_img.crop(crop_region)
# Update self.screenshot to the cropped version for history storage
crop_buffer = io.BytesIO()
current_img.save(crop_buffer, format='PNG')
self.screenshot = crop_buffer.getvalue()
logger.debug(f"Cropped screenshot to {current_img.size} (region: {crop_region})")
except Exception as e:
logger.error(f"Failed to crop screenshot: {e}")
crop_region = None # Disable cropping on error
# Check if this is the first check (no previous history)
history_keys = list(watch.history.keys())
if len(history_keys) == 0:
# First check - save baseline, no comparison
logger.info(f"First check for watch {watch.get('uuid')} - saving baseline screenshot")
# Close the PIL image before returning
current_img.close()
del current_img
update_obj = {
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
'last_error': False
}
logger.trace(f"Processed in {time.time() - now:.3f}s")
return False, update_obj, self.screenshot
# Get previous screenshot from history
try:
previous_timestamp = history_keys[-1]
previous_screenshot_bytes = watch.get_history_snapshot(timestamp=previous_timestamp)
# Screenshots are stored as PNG, so this should be bytes
if isinstance(previous_screenshot_bytes, str):
# If it's a string (shouldn't be for screenshots, but handle it)
previous_screenshot_bytes = previous_screenshot_bytes.encode('utf-8')
previous_img = Image.open(io.BytesIO(previous_screenshot_bytes))
# Crop previous image to the same region if visual selector is enabled
if crop_region:
try:
previous_img = previous_img.crop(crop_region)
logger.debug(f"Cropped previous screenshot to {previous_img.size}")
except Exception as e:
logger.warning(f"Failed to crop previous screenshot: {e}")
except Exception as e:
logger.warning(f"Failed to load previous screenshot for comparison: {e}")
# Clean up current image before returning
if 'current_img' in locals():
current_img.close()
del current_img
# If we can't load previous, treat as first check
update_obj = {
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
'last_error': False
}
logger.trace(f"Processed in {time.time() - now:.3f}s")
return False, update_obj, self.screenshot
# Perform comparison based on selected method
try:
# Ensure images are the same size
if current_img.size != previous_img.size:
logger.info(f"Resizing images to match: {previous_img.size} -> {current_img.size}")
previous_img = previous_img.resize(current_img.size, Image.Resampling.LANCZOS)
if comparison_method == 'pixelmatch':
changed_detected, change_score = self._compare_pixelmatch(
previous_img, current_img, threshold
)
logger.info(f"Pixelmatch: {change_score:.2f}% pixels different, threshold: {threshold*100:.0f}%")
else: # opencv (default)
changed_detected, change_score = self._compare_opencv(
previous_img, current_img, threshold
)
logger.info(f"OpenCV: {change_score:.2f}% pixels changed, threshold: {threshold:.0f}")
# Explicitly close PIL images to free memory immediately
current_img.close()
previous_img.close()
del current_img
del previous_img
del previous_screenshot_bytes # Release the large bytes object
except Exception as e:
logger.error(f"Failed to compare screenshots: {e}")
# Ensure cleanup even on error
for obj in ['current_img', 'previous_img']:
try:
locals()[obj].close()
except (KeyError, NameError, AttributeError):
pass
logger.trace(f"Processed in {time.time() - now:.3f}s")
raise ProcessorException(
message=f"Screenshot comparison failed: {e}",
url=watch.get('url')
)
# Return results
update_obj = {
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
'last_error': False
}
if changed_detected:
logger.info(f"Change detected using {comparison_method}! Score: {change_score:.2f}")
else:
logger.debug(f"No significant change using {comparison_method}. Score: {change_score:.2f}")
logger.trace(f"Processed in {time.time() - now:.3f}s")
return changed_detected, update_obj, self.screenshot
def _compare_opencv(self, img_from, img_to, threshold):
"""
Compare images using OpenCV cv2.absdiff method.
This is the fastest method (50-100x faster than SSIM) and works well
for detecting pixel-level changes with optional Gaussian blur to reduce
sensitivity to minor rendering differences.
Args:
img_from: Previous PIL Image
img_to: Current PIL Image
threshold: Pixel difference threshold (0-255)
Returns:
tuple: (changed_detected, change_percentage)
"""
import cv2
import numpy as np
# Convert PIL images to numpy arrays
arr_from = np.array(img_from)
arr_to = np.array(img_to)
# Convert to grayscale for faster comparison
if len(arr_from.shape) == 3:
gray_from = cv2.cvtColor(arr_from, cv2.COLOR_RGB2GRAY)
gray_to = cv2.cvtColor(arr_to, cv2.COLOR_RGB2GRAY)
else:
gray_from = arr_from
gray_to = arr_to
# Optional: Apply Gaussian blur to reduce sensitivity to minor rendering differences
# Controlled by environment variable, default sigma=0.8
blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8"))
if blur_sigma > 0:
gray_from = cv2.GaussianBlur(gray_from, (0, 0), blur_sigma)
gray_to = cv2.GaussianBlur(gray_to, (0, 0), blur_sigma)
# Calculate absolute difference
diff = cv2.absdiff(gray_from, gray_to)
# Apply threshold to create binary mask
_, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
# Count changed pixels
changed_pixels = np.count_nonzero(thresh)
total_pixels = thresh.size
change_percentage = (changed_pixels / total_pixels) * 100
# Determine if change detected (if more than 0.1% of pixels changed)
# This prevents triggering on single-pixel noise
min_change_percentage = float(os.getenv("OPENCV_MIN_CHANGE_PERCENT", "0.1"))
changed_detected = change_percentage > min_change_percentage
# Explicit memory cleanup - mark large arrays for garbage collection
del arr_from, arr_to
del gray_from, gray_to
del diff, thresh
return changed_detected, change_percentage
def _compare_pixelmatch(self, img_from, img_to, threshold):
"""
Compare images using pixelmatch (C++17 implementation via pybind11).
This method is 10-20x faster than SSIM and is specifically designed for
screenshot comparison with anti-aliasing detection. It's particularly good
at ignoring minor rendering differences while catching real changes.
Args:
img_from: Previous PIL Image
img_to: Current PIL Image
threshold: Color difference threshold (0-1, where 0 is most sensitive)
Returns:
tuple: (changed_detected, change_percentage)
"""
try:
from pybind11_pixelmatch import pixelmatch, Options
except ImportError:
logger.error("pybind11-pixelmatch not installed, falling back to OpenCV")
return self._compare_opencv(img_from, img_to, threshold * 255)
import numpy as np
# Convert to RGB if not already
if img_from.mode != 'RGB':
img_from = img_from.convert('RGB')
if img_to.mode != 'RGB':
img_to = img_to.convert('RGB')
# Convert to numpy arrays (pixelmatch expects RGBA format)
arr_from = np.array(img_from)
arr_to = np.array(img_to)
# Add alpha channel (pixelmatch expects RGBA)
if arr_from.shape[2] == 3:
alpha = np.ones((arr_from.shape[0], arr_from.shape[1], 1), dtype=np.uint8) * 255
arr_from = np.concatenate([arr_from, alpha], axis=2)
arr_to = np.concatenate([arr_to, alpha], axis=2)
# Create diff output array (RGBA)
diff_array = np.zeros_like(arr_from)
# Configure pixelmatch options
opts = Options()
opts.threshold = threshold
opts.includeAA = True # Ignore anti-aliasing differences
opts.alpha = 0.1 # Opacity of diff overlay
# Run pixelmatch (returns number of mismatched pixels)
width, height = img_from.size
num_diff_pixels = pixelmatch(
arr_from,
arr_to,
output=diff_array,
options=opts
)
# Calculate change percentage
total_pixels = width * height
change_percentage = (num_diff_pixels / total_pixels) * 100
# Determine if change detected (if more than 0.1% of pixels changed)
min_change_percentage = float(os.getenv("PIXELMATCH_MIN_CHANGE_PERCENT", "0.1"))
changed_detected = change_percentage > min_change_percentage
# Explicit memory cleanup - mark large arrays for garbage collection
del arr_from, arr_to
del diff_array
if 'alpha' in locals():
del alpha
return changed_detected, change_percentage

View File

@@ -1,272 +0,0 @@
{% extends 'base.html' %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
{% block content %}
<style>
.comparison-score {
padding: 1em;
background: #f5f5f5;
border-radius: 4px;
margin: 1em 0;
border: 1px solid #ddd;
}
.change-detected {
color: #d32f2f;
font-weight: bold;
}
.no-change {
color: #388e3c;
font-weight: bold;
}
.comparison-grid {
display: grid;
grid-template-columns: 1fr 1fr 1fr;
gap: 1.5em;
margin: 2em 0;
}
.screenshot-panel {
text-align: center;
background: #fff;
border: 1px solid #ddd;
border-radius: 4px;
padding: 1em;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
}
.screenshot-panel h3 {
margin: 0 0 1em 0;
font-size: 1.1em;
color: #333;
border-bottom: 2px solid #0078e7;
padding-bottom: 0.5em;
}
.screenshot-panel.diff h3 {
border-bottom-color: #d32f2f;
}
.screenshot-panel img {
max-width: 100%;
height: auto;
border: 1px solid #ddd;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.version-selector {
display: inline-block;
margin: 0 0.5em;
}
.version-selector label {
font-weight: bold;
margin-right: 0.5em;
}
@media (max-width: 1200px) {
.comparison-grid {
grid-template-columns: 1fr;
}
}
#settings {
background: #fff;
padding: 1.5em;
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
margin-bottom: 2em;
}
.diff-fieldset {
border: none;
padding: 0;
margin: 0;
}
.edit-link {
float: right;
margin-top: -0.5em;
}
</style>
<div id="settings">
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=uuid, next='diff') }}" class="pure-button button-small edit-link">
<img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread_the_word.svg')}}" alt="Edit" />
Edit Watch
</a>
<form class="pure-form" action="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid) }}" method="GET" id="diff-form">
<fieldset class="diff-fieldset">
<h2 style="margin-top: 0;">Screenshot Comparison (Fast)</h2>
{% if versions|length >= 2 %}
<div class="version-selector">
<label for="diff-from-version">From:</label>
<select id="diff-from-version" name="from_version" class="needs-localtime" onchange="this.form.submit()">
{%- for version in versions|reverse -%}
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
{{ version }}
</option>
{%- endfor -%}
</select>
</div>
<div class="version-selector">
<label for="diff-to-version">To:</label>
<select id="diff-to-version" name="to_version" class="needs-localtime" onchange="this.form.submit()">
{%- for version in versions|reverse -%}
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
{{ version }}
</option>
{%- endfor -%}
</select>
</div>
{% endif %}
<!-- Comparison score display -->
<div class="comparison-score">
<strong>Change Detection:</strong> {{ "%.2f"|format(change_percentage) }}% of pixels changed
<br>
{% if change_percentage > 0.1 %}
<span class="change-detected">⚠ Change Detected</span>
{% else %}
<span class="no-change">✓ No Significant Change</span>
{% endif %}
</div>
</fieldset>
</form>
</div>
<div id="screenshot-comparison">
<!-- Three-panel layout -->
<div class="comparison-grid">
<!-- Panel 1: From (Previous) -->
<div class="screenshot-panel">
<h3>Previous Snapshot</h3>
<div style="color: #666; font-size: 0.9em; margin-bottom: 1em;">
{{ from_version|format_timestamp_timeago }}
</div>
<div style="text-align: center; margin-bottom: 0.5em;">
<a href="#" onclick="downloadImage('img-from', '{{ from_version }}'); return false;" style="color: #0078e7; text-decoration: none; display: inline-flex; align-items: center; gap: 0.3em; font-size: 0.85em;" title="Download previous snapshot">
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
<path d="M8 12L3 7h3V1h4v6h3z"/>
<path d="M1 14h14v2H1z"/>
</svg>
Download
</a>
</div>
<img id="img-from" src="data:image/png;base64,{{ img_from_b64 }}" alt="Previous screenshot">
</div>
<!-- Panel 2: To (Current) -->
<div class="screenshot-panel">
<h3>Current Snapshot</h3>
<div style="color: #666; font-size: 0.9em; margin-bottom: 1em;">
{{ to_version|format_timestamp_timeago }}
</div>
<div style="text-align: center; margin-bottom: 0.5em;">
<a href="#" onclick="downloadImage('img-to', '{{ to_version }}'); return false;" style="color: #0078e7; text-decoration: none; display: inline-flex; align-items: center; gap: 0.3em; font-size: 0.85em;" title="Download current snapshot">
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
<path d="M8 12L3 7h3V1h4v6h3z"/>
<path d="M1 14h14v2H1z"/>
</svg>
Download
</a>
</div>
<img id="img-to" src="data:image/png;base64,{{ img_to_b64 }}" alt="Current screenshot">
</div>
<!-- Panel 3: Difference (with red highlights) -->
<div class="screenshot-panel diff">
<h3>Difference Visualization</h3>
<div style="color: #d32f2f; font-size: 0.9em; margin-bottom: 1em; font-weight: bold; display: flex; align-items: center; justify-content: center; gap: 1em;">
<a href="#" onclick="downloadImage('diff-image', '{{ to_version }}_diff'); return false;" style="color: #0078e7; text-decoration: none; display: flex; align-items: center; gap: 0.3em;" title="Download difference image">
<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
<path d="M8 12L3 7h3V1h4v6h3z"/>
<path d="M1 14h14v2H1z"/>
</svg>
Download
</a>
<span>Red = Changed Pixels</span>
</div>
<img id="diff-image" src="data:image/jpeg;base64,{{ diff_image_b64 }}" alt="Difference visualization with red highlights">
</div>
</div>
{% if comparison_data and comparison_data.get('history') and comparison_data.history|length > 1 %}
<div style="margin-top: 3em; padding: 1em; background: #fff; border-radius: 4px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
<h3>Comparison History</h3>
<p style="color: #666; font-size: 0.9em;">Recent comparison results (last {{ comparison_data.history|length }} checks)</p>
<div style="overflow-x: auto;">
<table class="pure-table pure-table-striped" style="width: 100%;">
<thead>
<tr>
<th>Timestamp</th>
<th>Change %</th>
<th>Method</th>
<th>Changed?</th>
</tr>
</thead>
<tbody>
{% for entry in comparison_data.history|reverse %}
<tr>
<td>{{ entry.timestamp|format_timestamp_timeago }}</td>
<td>{{ "%.2f"|format(entry.change_percentage) }}%</td>
<td>{{ entry.method }}</td>
<td>
{% if entry.changed %}
<span style="color: #d32f2f; font-weight: bold;">Yes</span>
{% else %}
<span style="color: #388e3c;">No</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endif %}
</div>
<script>
function downloadImage(imageId, filename) {
// Get the image element
const img = document.getElementById(imageId);
const base64Data = img.src;
// Convert base64 to blob
const byteString = atob(base64Data.split(',')[1]);
const mimeString = base64Data.split(',')[0].split(':')[1].split(';')[0];
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
const blob = new Blob([ab], { type: mimeString });
// Determine file extension from MIME type
const extension = mimeString.includes('jpeg') ? '.jpeg' : '.png';
// Create download link
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename + extension;
document.body.appendChild(a);
a.click();
// Cleanup
setTimeout(() => {
document.body.removeChild(a);
URL.revokeObjectURL(url);
}, 100);
}
</script>
{% endblock %}

View File

@@ -1,58 +0,0 @@
{% extends 'base.html' %}
{% block content %}
<div class="box">
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1em;">
<div style="flex: 1;">
<h2>Screenshot Preview - {{ watch.label or watch.url }}</h2>
<a href="{{ url_for('ui.form_watch_checknow', uuid=uuid) }}" class="pure-button button-small button-primary">Recheck</a>
<a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid) }}" class="pure-button button-small">View Diff History</a>
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=uuid) }}" class="pure-button button-small">Edit</a>
</div>
</div>
<div id="settings" style="margin-bottom: 1.5em; padding: 1em; background: #f5f5f5; border-radius: 4px;">
<form action="{{ url_for('ui.ui_preview.preview_page', uuid=uuid) }}">
<fieldset>
<legend>Preview Options</legend>
<div class="pure-control-group">
<label for="version">Version:</label>
<select name="version" id="version" onchange="this.form.submit()" class="pure-input-1-3">
{% for version_timestamp in versions|reverse %}
<option value="{{ version_timestamp }}" {% if version_timestamp == timestamp %}selected{% endif %}>
{{ version_timestamp|format_timestamp_timeago }}
</option>
{% endfor %}
</select>
</div>
<div class="pure-control-group">
<label for="url">Watching:</label>
<a href="{{ current_diff_url }}" target="_blank" rel="noopener" style="word-break: break-all;">{{ current_diff_url }}</a>
</div>
</fieldset>
</form>
</div>
<div id="screenshot-container" style="text-align: center; border: 1px solid #ddd; padding: 2em; background: #fafafa; border-radius: 4px;">
<h3 style="margin-top: 0;">Screenshot from {{ timestamp|format_timestamp_timeago }}</h3>
<img src="data:{{ mime_type }};base64,{{ img_b64 }}"
alt="Screenshot preview"
style="max-width: 100%; height: auto; border: 1px solid #ccc; box-shadow: 0 2px 8px rgba(0,0,0,0.1); border-radius: 2px;">
</div>
<div style="margin-top: 2em; padding: 1em; background: #f9f9f9; border-left: 4px solid #4CAF50;">
<h3 style="margin-top: 0;">About SSIM Screenshot Monitoring</h3>
<p>This watch uses <strong>Structural Similarity Index (SSIM)</strong> to detect visual changes in screenshots.</p>
<ul style="margin-left: 2em;">
<li><strong>Robust to antialiasing</strong> - Ignores minor rendering differences between browser runs</li>
<li><strong>Configurable sensitivity</strong> - Adjust threshold to catch major or minor changes</li>
<li><strong>Visual diff</strong> - Changed regions highlighted in red in diff view</li>
<li><strong>Works with direct images</strong> - Can monitor JPEG/PNG URLs without browser rendering</li>
</ul>
<p>
<a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid) }}" class="pure-button button-small">View Diff History</a>
to see what changed between versions.
</p>
</div>
</div>
{% endblock %}

View File

@@ -88,7 +88,7 @@ class guess_stream_type():
magic_content_header = mime
except Exception as e:
logger.warning(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
logger.error(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
# Content-based detection (most reliable for text formats)
# Check for HTML patterns first - if found, override magic's text/plain
@@ -103,15 +103,15 @@ class guess_stream_type():
self.is_json = True
elif 'pdf' in magic_content_header:
self.is_pdf = True
elif has_html_patterns or http_content_header == 'text/html':
self.is_html = True
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
self.is_json = True
# magic will call a rss document 'xml'
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
self.is_rss = True
elif has_html_patterns or http_content_header == 'text/html':
self.is_html = True
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
self.is_json = True
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
# Only mark as generic XML if not already detected as RSS
if not self.is_rss:

View File

@@ -6,8 +6,69 @@ import re
class Restock(dict):
def parse_currency(self, raw_value: str) -> Union[float, None]:
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
def _normalize_currency_code(self, currency: str, normalize_dollar=False) -> str:
"""
Normalize currency symbol or code to ISO 4217 code for consistency.
Uses iso4217parse for accurate conversion.
Returns empty string for ambiguous symbols like '$' where we can't determine
the specific currency (USD, CAD, AUD, etc.).
"""
if not currency:
return currency
# If already a 3-letter code, likely already normalized
if len(currency) == 3 and currency.isupper():
return currency
# Handle ambiguous dollar sign - can't determine which dollar currency
if normalize_dollar and currency == '$':
return ''
try:
import iso4217parse
# Parse the currency - returns list of possible matches
# This handles: € -> EUR, Kč -> CZK, £ -> GBP, ¥ -> JPY, etc.
currencies = iso4217parse.parse(currency)
if currencies:
# Return first match (iso4217parse handles the mapping)
return currencies[0].alpha3
except Exception:
pass
# Fallback: return as-is if can't normalize
return currency
def parse_currency(self, raw_value: str, normalize_dollar=False) -> Union[dict, None]:
"""
Parse price and currency from text, handling messy formats with extra text.
Returns dict with 'price' and 'currency' keys (ISO 4217 code), or None if parsing fails.
normalize_dollar convert $ to '' on sites that we cant tell what currency the site is in
"""
try:
from price_parser import Price
# price-parser handles:
# - Extra text before/after ("Beginning at", "tax incl.")
# - Various number formats (1 099,00 or 1,099.00)
# - Currency symbols and codes
price_obj = Price.fromstring(raw_value)
if price_obj.amount is not None:
result = {'price': float(price_obj.amount)}
if price_obj.currency:
# Normalize currency symbol to ISO 4217 code for consistency with metadata
normalized_currency = self._normalize_currency_code(currency=price_obj.currency, normalize_dollar=normalize_dollar)
result['currency'] = normalized_currency
return result
except Exception as e:
from loguru import logger
logger.trace(f"price-parser failed on '{raw_value}': {e}, falling back to manual parsing")
# Fallback to existing manual parsing logic
standardized_value = raw_value
if ',' in standardized_value and '.' in standardized_value:
@@ -24,7 +85,7 @@ class Restock(dict):
if standardized_value:
# Convert to float
return float(parse_decimal(standardized_value, locale='en'))
return {'price': float(parse_decimal(standardized_value, locale='en'))}
return None
@@ -51,7 +112,15 @@ class Restock(dict):
# Custom logic to handle setting price and original_price
if key == 'price' or key == 'original_price':
if isinstance(value, str):
value = self.parse_currency(raw_value=value)
parsed = self.parse_currency(raw_value=value)
if parsed:
# Set the price value
value = parsed.get('price')
# Also set currency if found and not already set
if parsed.get('currency') and not self.get('currency'):
super().__setitem__('currency', parsed.get('currency'))
else:
value = None
super().__setitem__(key, value)

View File

@@ -1,13 +1,13 @@
from wtforms import (
BooleanField,
validators,
FloatField
FloatField, StringField
)
from wtforms.fields.choices import RadioField
from wtforms.fields.form import FormField
from wtforms.form import Form
from changedetectionio.forms import processor_text_json_diff_form
from changedetectionio.forms import processor_text_json_diff_form, ValidateCSSJSONXPATHInput, StringListField
class RestockSettingsForm(Form):
@@ -27,6 +27,8 @@ class RestockSettingsForm(Form):
validators.NumberRange(min=0, max=100, message="Should be between 0 and 100"),
], render_kw={"placeholder": "0%", "size": "5"})
price_change_custom_include_filters = StringField('Override automatic price detection with this selector', [ValidateCSSJSONXPATHInput()], default='', render_kw={"style": "width: 100%;"})
follow_price_changes = BooleanField('Follow price changes', default=True)
class processor_settings_form(processor_text_json_diff_form):
@@ -74,7 +76,11 @@ class processor_settings_form(processor_text_json_diff_form):
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
</fieldset>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.restock_settings.price_change_custom_include_filters) }}
<span class="pure-form-message-inline">Override the automatic price metadata reader with this custom select from the <a href="#visualselector">Visual Selector</a>, in the case that the automatic detection was incorrect.</span><br>
</fieldset>
</div>
</fieldset>
"""

View File

@@ -6,6 +6,8 @@ from loguru import logger
import urllib3
import time
from ..text_json_diff.processor import FilterNotFoundInResponse
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Re-stock & Price detection for pages with a SINGLE product'
description = 'Detects if the product goes back to in-stock'
@@ -125,6 +127,85 @@ def get_itemprop_availability(html_content) -> Restock:
return value
def get_price_data_availability_from_filters(html_content, price_change_custom_include_filters) -> Restock:
"""
Extract price using custom CSS/XPath selectors.
Reuses apply_include_filters logic from text_json_diff processor.
Args:
html_content: The HTML content to parse
price_change_custom_include_filters: List of CSS/XPath selectors to extract price
Returns:
Restock dict with 'price' key if found
"""
from changedetectionio import html_tools
from changedetectionio.processors.magic import guess_stream_type
value = Restock()
if not price_change_custom_include_filters:
return value
# Get content type
stream_content_type = guess_stream_type(http_content_header='text/html', content=html_content)
# Apply filters to extract price element
filtered_content = ""
for filter_rule in price_change_custom_include_filters:
# XPath filters
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
filtered_content += html_tools.xpath_filter(
xpath_filter=filter_rule.replace('xpath:', ''),
html_content=html_content,
append_pretty_line_formatting=False,
is_rss=stream_content_type.is_rss
)
# XPath1 filters (first match only)
elif filter_rule.startswith('xpath1:'):
filtered_content += html_tools.xpath1_filter(
xpath_filter=filter_rule.replace('xpath1:', ''),
html_content=html_content,
append_pretty_line_formatting=False,
is_rss=stream_content_type.is_rss
)
# CSS selectors, default fallback
else:
filtered_content += html_tools.include_filters(
include_filters=filter_rule,
html_content=html_content,
append_pretty_line_formatting=False
)
if filtered_content.strip():
# Convert HTML to text
import re
price_text = re.sub(
r'[\r\n\t]+', ' ',
html_tools.html_to_text(
html_content=filtered_content,
render_anchor_tag_content=False,
is_rss=False
).strip()
)
# Parse the price from text
try:
parsed_result = value.parse_currency(price_text, normalize_dollar=True)
if parsed_result:
value['price'] = parsed_result.get('price')
if parsed_result.get('currency'):
value['currency'] = parsed_result.get('currency')
logger.debug(f"Extracted price from custom selector: {parsed_result.get('price')} {parsed_result.get('currency', '')} (from text: '{price_text}')")
except Exception as e:
logger.warning(f"Failed to parse price from '{price_text}': {e}")
return value
def is_between(number, lower=None, upper=None):
"""
Check if a number is between two values.
@@ -185,47 +266,32 @@ class perform_site_check(difference_detection_processor):
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
break
# if not has custom selector..
itemprop_availability = {}
# Try built-in extraction first, this will scan metadata in the HTML
try:
itemprop_availability = get_itemprop_availability(self.fetcher.content)
except MoreThanOnePriceFound as e:
# Add the real data
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
url=watch.get('url'),
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
xpath_data=self.fetcher.xpath_data
)
# If built-in extraction didn't get both price AND availability, try plugin override
# Only check plugin if this watch is using a fetcher that might provide better data
has_price = itemprop_availability.get('price') is not None
has_availability = itemprop_availability.get('availability') is not None
# @TODO !!! some setting like "Use as fallback" or "always use", "t
if not (has_price and has_availability) or True:
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
fetcher_name = watch.get('fetch_backend', 'html_requests')
# Only try plugin override if not using system default (which might be anything)
if fetcher_name and fetcher_name != 'system':
logger.debug("Calling extra plugins for getting item price/availability")
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
if plugin_availability:
# Plugin provided better data, use it
plugin_has_price = plugin_availability.get('price') is not None
plugin_has_availability = plugin_availability.get('availability') is not None
# Only use plugin data if it's actually better than what we have
if plugin_has_price or plugin_has_availability:
itemprop_availability = plugin_availability
logger.info(f"Using plugin-provided availability data for fetcher '{fetcher_name}' (built-in had price={has_price}, availability={has_availability}; plugin has price={plugin_has_price}, availability={plugin_has_availability})")
if not plugin_availability:
logger.debug("No item price/availability from plugins")
if restock_settings.get('price_change_custom_include_filters'):
itemprop_availability = get_price_data_availability_from_filters(html_content=self.fetcher.content,
price_change_custom_include_filters=restock_settings.get(
'price_change_custom_include_filters')
)
if not itemprop_availability or not itemprop_availability.get('price'):
raise FilterNotFoundInResponse(
msg=restock_settings.get('price_change_custom_include_filters'),
screenshot=self.fetcher.screenshot,
xpath_data=self.fetcher.xpath_data
)
else:
try:
itemprop_availability = get_itemprop_availability(self.fetcher.content)
except MoreThanOnePriceFound as e:
# Add the real data
raise ProcessorException(
message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
url=watch.get('url'),
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
xpath_data=self.fetcher.xpath_data
)
# Something valid in get_itemprop_availability() by scraping metadata ?
if itemprop_availability.get('price') or itemprop_availability.get('availability'):

View File

@@ -1,47 +0,0 @@
{% extends 'base.html' %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
{% block content %}
<div class="tabs">
<ul>
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">Error Text</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">Error Screenshot</a></li> {% endif %}
<li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">Text</a></li>
<li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">Screenshot</a></li>
<li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">Extract Data</a></li>
</ul>
</div>
<div id="diff-ui">
<div class="xxxxtab-pane-inner" id="extract">
<form id="extract-data-form" class="pure-form pure-form-stacked edit-form" action="{{ url_for('ui.ui_diff.diff_history_page_extract_POST', uuid=uuid) }}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<p>This tool will extract text data from all of the watch history.</p>
<div class="pure-control-group">
{{ render_field(extract_form.extract_regex) }}
<span class="pure-form-message-inline">
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
<p>
For example, to extract only the numbers from text &dash;<br>
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
</p>
<p>
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
</p>
<p>
Each RegEx group bracket <code>()</code> will be in its own column, the first column value is always the date.
</p>
</span>
</div>
<div class="pure-control-group">
{{ render_button(extract_form.extract_submit_button) }}
</div>
</form>
</div>
</div>
{% endblock %}

View File

@@ -32,7 +32,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
from changedetectionio import forms, html_tools
from changedetectionio.model.Watch import model as watch_model
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ProcessPoolExecutor
from copy import deepcopy
from flask import request
import brotli
@@ -45,7 +45,6 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
text_before_filter = ''
trigger_line_numbers = []
ignore_line_numbers = []
blocked_line_numbers = []
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
@@ -77,16 +76,13 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
# Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
try:
with ThreadPoolExecutor(max_workers=2) as executor:
future1 = executor.submit(_task, tmp_watch, update_handler)
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
# Do this as a parallel process because it could take some time
with ProcessPoolExecutor(max_workers=2) as executor:
future1 = executor.submit(_task, tmp_watch, update_handler)
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
text_after_filter = future1.result()
text_before_filter = future2.result()
except Exception as e:
x=1
text_after_filter = future1.result()
text_before_filter = future2.result()
try:
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
@@ -105,23 +101,14 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
except Exception as e:
text_before_filter = f"Error: {str(e)}"
try:
blocked_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
wordlist=tmp_watch.get('text_should_not_be_present', []) + datastore.data['settings']['application'].get('text_should_not_be_present', []),
mode='line numbers'
)
except Exception as e:
text_before_filter = f"Error: {str(e)}"
logger.trace(f"Parsed in {time.time() - now:.3f}s")
return ({
'after_filter': text_after_filter,
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
'blocked_line_numbers': blocked_line_numbers,
'duration': time.time() - now,
'ignore_line_numbers': ignore_line_numbers,
'trigger_line_numbers': trigger_line_numbers,
'after_filter': text_after_filter,
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
'duration': time.time() - now,
'trigger_line_numbers': trigger_line_numbers,
'ignore_line_numbers': ignore_line_numbers,
})

View File

@@ -1,232 +0,0 @@
"""
History/diff rendering for text_json_diff processor.
This module handles the visualization of text/HTML/JSON changes by rendering
a side-by-side or unified diff view with syntax highlighting and change markers.
"""
import os
import time
from loguru import logger
from changedetectionio import diff, strtobool
from changedetectionio.diff import (
REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
)
from changedetectionio.notification.handler import apply_html_color_to_body
def build_diff_cell_visualizer(content, resolution=100):
"""
Build a visual cell grid for the diff visualizer.
Analyzes the content for placemarkers indicating changes and creates a
grid of cells representing the document, with each cell marked as:
- 'deletion' for removed content
- 'insertion' for added content
- 'mixed' for cells containing both deletions and insertions
- empty string for cells with no changes
Args:
content: The diff content with placemarkers
resolution: Number of cells to create (default 100)
Returns:
List of dicts with 'class' key for each cell's CSS class
"""
if not content:
return [{'class': ''} for _ in range(resolution)]
now = time.time()
# Work with character positions for better accuracy
content_length = len(content)
if content_length == 0:
return [{'class': ''} for _ in range(resolution)]
chars_per_cell = max(1, content_length / resolution)
# Track change type for each cell
cell_data = {}
# Placemarkers to detect
change_markers = {
REMOVED_PLACEMARKER_OPEN: 'deletion',
ADDED_PLACEMARKER_OPEN: 'insertion',
CHANGED_PLACEMARKER_OPEN: 'deletion',
CHANGED_INTO_PLACEMARKER_OPEN: 'insertion',
}
# Find all occurrences of each marker
for marker, change_type in change_markers.items():
pos = 0
while True:
pos = content.find(marker, pos)
if pos == -1:
break
# Calculate which cell this marker falls into
cell_index = min(int(pos / chars_per_cell), resolution - 1)
if cell_index not in cell_data:
cell_data[cell_index] = change_type
elif cell_data[cell_index] != change_type:
# Mixed changes in this cell
cell_data[cell_index] = 'mixed'
pos += len(marker)
# Build the cell list
cells = []
for i in range(resolution):
change_type = cell_data.get(i, '')
cells.append({'class': change_type})
logger.debug(f"Built diff cell visualizer: {len([c for c in cells if c['class']])} cells with changes out of {resolution} in {time.time() - now:.2f}s")
return cells
# Diff display preferences configuration - single source of truth
DIFF_PREFERENCES_CONFIG = {
'changesOnly': {'default': True, 'type': 'bool'},
'ignoreWhitespace': {'default': False, 'type': 'bool'},
'removed': {'default': True, 'type': 'bool'},
'added': {'default': True, 'type': 'bool'},
'replaced': {'default': True, 'type': 'bool'},
'type': {'default': 'diffLines', 'type': 'value'},
}
def render(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
"""
Render the history/diff view for text/JSON/HTML changes.
Args:
watch: The watch object
datastore: The ChangeDetectionStore instance
request: Flask request object
url_for: Flask url_for function
render_template: Flask render_template function
flash: Flask flash function
redirect: Flask redirect function
extract_form: Optional pre-built extract form (for error cases)
Returns:
Rendered HTML response
"""
from changedetectionio import forms
uuid = watch.get('uuid')
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
# Use provided form or create a new one
if extract_form is None:
extract_form = forms.extractDataForm(formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
history = watch.history
dates = list(history.keys())
# If a "from_version" was requested, then find it (or the closest one)
# Also set "from version" to be the closest version to the one that was last viewed.
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
from_version = request.args.get('from_version', from_version_timestamp )
# Use the current one if nothing was specified
to_version = request.args.get('to_version', str(dates[-1]))
try:
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
except Exception as e:
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
try:
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
except Exception as e:
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
screenshot_url = watch.get_screenshot()
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
datastore.set_last_viewed(uuid, time.time())
# Parse diff preferences from request using config as single source of truth
# Check if this is a user submission (any diff pref param exists in query string)
user_submitted = any(key in request.args for key in DIFF_PREFERENCES_CONFIG.keys())
diff_prefs = {}
for key, config in DIFF_PREFERENCES_CONFIG.items():
if user_submitted:
# User submitted form - missing checkboxes are explicitly OFF
if config['type'] == 'bool':
diff_prefs[key] = strtobool(request.args.get(key, 'off'))
else:
diff_prefs[key] = request.args.get(key, config['default'])
else:
# Initial load - use defaults from config
diff_prefs[key] = config['default']
content = diff.render_diff(previous_version_file_contents=from_version_file_contents,
newest_version_file_contents=to_version_file_contents,
include_replaced=diff_prefs['replaced'],
include_added=diff_prefs['added'],
include_removed=diff_prefs['removed'],
include_equal=diff_prefs['changesOnly'],
ignore_junk=diff_prefs['ignoreWhitespace'],
word_diff=diff_prefs['type'] == 'diffWords',
)
# Build cell grid visualizer before applying HTML color (so we can detect placemarkers)
diff_cell_grid = build_diff_cell_visualizer(content)
content = apply_html_color_to_body(n_body=content)
offscreen_content = render_template("diff-offscreen-options.html")
note = ''
if str(from_version) != str(dates[-2]) or str(to_version) != str(dates[-1]):
note = 'Note: You are not viewing the latest changes.'
output = render_template("diff.html",
#initial_scroll_line_number=100,
bottom_horizontal_offscreen_contents=offscreen_content,
content=content,
current_diff_url=watch['url'],
diff_cell_grid=diff_cell_grid,
diff_prefs=diff_prefs,
extra_classes='difference-page',
extra_stylesheets=extra_stylesheets,
extra_title=f" - {watch.label} - History",
extract_form=extract_form,
from_version=str(from_version),
is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(),
newest=to_version_file_contents,
newest_version_timestamp=dates[-1],
note=note,
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
pure_menu_fixed=False,
screenshot=screenshot_url,
to_version=str(to_version),
uuid=uuid,
versions=dates, # All except current/last
watch_a=watch,
)
return output

View File

@@ -10,7 +10,7 @@ from changedetectionio.conditions import execute_ruleset_against_all_plugins
from changedetectionio.processors import difference_detection_processor
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
from changedetectionio import html_tools, content_fetchers
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from loguru import logger
from changedetectionio.processors.magic import guess_stream_type
@@ -279,7 +279,7 @@ class ContentProcessor:
# Sort JSON to avoid false alerts from reordering
try:
content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False)
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
except Exception:
# Might be malformed JSON, continue anyway
pass
@@ -297,7 +297,7 @@ class ContentProcessor:
xpath_filter=filter_rule.replace('xpath:', ''),
html_content=content,
append_pretty_line_formatting=not self.watch.is_source_type_url,
is_xml=stream_content_type.is_rss or stream_content_type.is_xml
is_rss=stream_content_type.is_rss
)
# XPath1 filters (first match only)
@@ -306,7 +306,7 @@ class ContentProcessor:
xpath_filter=filter_rule.replace('xpath1:', ''),
html_content=content,
append_pretty_line_formatting=not self.watch.is_source_type_url,
is_xml=stream_content_type.is_rss or stream_content_type.is_xml
is_rss=stream_content_type.is_rss
)
# JSON filters
@@ -324,13 +324,13 @@ class ContentProcessor:
append_pretty_line_formatting=not self.watch.is_source_type_url
)
# Raise error if filter returned nothing
if not filtered_content.strip():
raise FilterNotFoundInResponse(
msg=self.filter_config.include_filters,
screenshot=self.fetcher.screenshot,
xpath_data=self.fetcher.xpath_data
)
# Raise error if filter returned nothing
if not filtered_content.strip():
raise FilterNotFoundInResponse(
msg=self.filter_config.include_filters,
screenshot=self.fetcher.screenshot,
xpath_data=self.fetcher.xpath_data
)
return filtered_content
@@ -467,7 +467,6 @@ class perform_site_check(difference_detection_processor):
c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')
# === EMPTY PAGE CHECK ===
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
@@ -584,6 +583,7 @@ class perform_site_check(difference_detection_processor):
include_added=watch.get('filter_text_added', True),
include_removed=watch.get('filter_text_removed', True),
include_replaced=watch.get('filter_text_replaced', True),
line_feed_sep="\n",
include_change_type_prefix=False
)

View File

@@ -1,5 +1,5 @@
[pytest]
addopts = --no-start-live-server --live-server-port=0
addopts = --no-start-live-server --live-server-port=5005
#testpaths = tests pytest_invenio
#live_server_scope = function

View File

@@ -5,7 +5,7 @@ from blinker import signal
def register_watch_operation_handlers(socketio, datastore):
"""Register Socket.IO event handlers for watch operations"""
@socketio.on('watch_operation')
def handle_watch_operation(data):
"""Handle watch operations like pause, mute, recheck via Socket.IO"""

View File

@@ -32,30 +32,22 @@ class SignalHandler:
watch_favicon_bumped_signal = signal('watch_favicon_bump')
watch_favicon_bumped_signal.connect(self.handle_watch_bumped_favicon_signal, weak=False)
watch_small_status_comment_signal = signal('watch_small_status_comment')
watch_small_status_comment_signal.connect(self.handle_watch_small_status_update, weak=False)
# Connect to the notification_event signal
notification_event_signal = signal('notification_event')
notification_event_signal.connect(self.handle_notification_event, weak=False)
logger.info("SignalHandler: Connected to notification_event signal")
# Create and start the queue update thread using standard threading
import threading
self.polling_emitter_thread = threading.Thread(
target=self.polling_emit_running_or_queued_watches_threaded,
daemon=True
)
self.polling_emitter_thread.start()
logger.info("Started polling thread using threading (eventlet-free)")
def handle_watch_small_status_update(self, *args, **kwargs):
"""Small simple status update, for example 'Connecting...'"""
watch_uuid = kwargs.get('watch_uuid')
status = kwargs.get('status')
if watch_uuid and status:
logger.debug(f"Socket.IO: Received watch small status update '{status}' for UUID {watch_uuid}")
# Emit the status update to all connected clients
self.socketio_instance.emit("watch_small_status_comment", {
"uuid": watch_uuid,
"status": status,
"event_timestamp": time.time()
})
# Store the thread reference in socketio for clean shutdown
self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
def handle_signal(self, *args, **kwargs):
logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
@@ -132,6 +124,74 @@ class SignalHandler:
except Exception as e:
logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")
def polling_emit_running_or_queued_watches_threaded(self):
"""Threading version of polling for Windows compatibility"""
import time
import threading
logger.info("Queue update thread started (threading mode)")
# Import here to avoid circular imports
from changedetectionio.flask_app import app
from changedetectionio import worker_handler
watch_check_update = signal('watch_check_update')
# Track previous state to avoid unnecessary emissions
previous_running_uuids = set()
# Run until app shutdown - check exit flag more frequently for fast shutdown
exit_event = getattr(app.config, 'exit', threading.Event())
while not exit_event.is_set():
try:
# Get current running UUIDs from async workers
running_uuids = set(worker_handler.get_running_uuids())
# Only send updates for UUIDs that changed state
newly_running = running_uuids - previous_running_uuids
no_longer_running = previous_running_uuids - running_uuids
# Send updates for newly running UUIDs (but exit fast if shutdown requested)
for uuid in newly_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} started processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
if not exit_event.is_set():
for uuid in no_longer_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} finished processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Update tracking for next iteration
previous_running_uuids = running_uuids
# Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
for _ in range(20): # 20 * 0.5 = 10 seconds total
if exit_event.is_set():
break
time.sleep(0.5)
except Exception as e:
logger.error(f"Error in threading polling: {str(e)}")
# Even during error recovery, check for exit quickly
for _ in range(1): # 1 * 0.5 = 0.5 seconds
if exit_event.is_set():
break
time.sleep(0.5)
# Check if we're in pytest environment - if so, be more gentle with logging
import sys
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
if not in_pytest:
logger.info("Queue update thread stopped (threading mode)")
def handle_watch_update(socketio, **kwargs):
@@ -323,6 +383,19 @@ def init_socketio(app, datastore):
"""Shutdown the SocketIO server fast and aggressively"""
try:
logger.info("Socket.IO: Fast shutdown initiated...")
# For threading mode, give the thread a very short time to exit gracefully
if hasattr(socketio, 'polling_emitter_thread'):
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
socketio.polling_emitter_thread.join(timeout=1.0) # Only 1 second timeout
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
else:
logger.info("Socket.IO: Polling thread stopped quickly")
else:
logger.info("Socket.IO: Polling thread already stopped")
logger.info("Socket.IO: Fast shutdown complete")
except Exception as e:
logger.error(f"Socket.IO error during shutdown: {str(e)}")

View File

@@ -29,135 +29,16 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
return re.sub(pattern, repl, html_content)
# Jinja2 template for formatting RSS/Atom feed entries
# Covers all common feedparser entry fields including namespaced elements
# Outputs HTML that will be converted to text via html_to_text
# @todo - This could be a UI setting in the future
RSS_ENTRY_TEMPLATE = """<article class="rss-item" id="{{ entry.id|replace('"', '')|replace(' ', '-') }}">{%- if entry.title -%}Title: {{ entry.title }}<br>{%- endif -%}
{%- if entry.link -%}<strong>Link:</strong> <a href="{{ entry.link }}">{{ entry.link }}</a><br>
{%- endif -%}
{%- if entry.id -%}
<strong>Guid:</strong> {{ entry.id }}<br>
{%- endif -%}
{%- if entry.published -%}
<strong>PubDate:</strong> {{ entry.published }}<br>
{%- endif -%}
{%- if entry.updated and entry.updated != entry.published -%}
<strong>Updated:</strong> {{ entry.updated }}<br>
{%- endif -%}
{%- if entry.author -%}
<strong>Author:</strong> {{ entry.author }}<br>
{%- elif entry.author_detail and entry.author_detail.name -%}
<strong>Author:</strong> {{ entry.author_detail.name }}
{%- if entry.author_detail.email %} ({{ entry.author_detail.email }}){% endif -%}
<br>
{%- endif -%}
{%- if entry.contributors -%}
<strong>Contributors:</strong> {% for contributor in entry.contributors -%}
{{ contributor.name if contributor.name else contributor }}
{%- if not loop.last %}, {% endif -%}
{%- endfor %}<br>
{%- endif -%}
{%- if entry.publisher -%}
<strong>Publisher:</strong> {{ entry.publisher }}<br>
{%- endif -%}
{%- if entry.rights -%}
<strong>Rights:</strong> {{ entry.rights }}<br>
{%- endif -%}
{%- if entry.license -%}
<strong>License:</strong> {{ entry.license }}<br>
{%- endif -%}
{%- if entry.language -%}
<strong>Language:</strong> {{ entry.language }}<br>
{%- endif -%}
{%- if entry.tags -%}
<strong>Tags:</strong> {% for tag in entry.tags -%}
{{ tag.term if tag.term else tag }}
{%- if not loop.last %}, {% endif -%}
{%- endfor %}<br>
{%- endif -%}
{%- if entry.category -%}
<strong>Category:</strong> {{ entry.category }}<br>
{%- endif -%}
{%- if entry.comments -%}
<strong>Comments:</strong> <a href="{{ entry.comments }}">{{ entry.comments }}</a><br>
{%- endif -%}
{%- if entry.slash_comments -%}
<strong>Comment Count:</strong> {{ entry.slash_comments }}<br>
{%- endif -%}
{%- if entry.enclosures -%}
<strong>Enclosures:</strong><br>
{%- for enclosure in entry.enclosures %}
- <a href="{{ enclosure.href }}">{{ enclosure.href }}</a> ({{ enclosure.type if enclosure.type else 'unknown type' }}
{%- if enclosure.length %}, {{ enclosure.length }} bytes{% endif -%}
)<br>
{%- endfor -%}
{%- endif -%}
{%- if entry.media_content -%}
<strong>Media:</strong><br>
{%- for media in entry.media_content %}
- <a href="{{ media.url }}">{{ media.url }}</a>
{%- if media.type %} ({{ media.type }}){% endif -%}
{%- if media.width and media.height %} {{ media.width }}x{{ media.height }}{% endif -%}
<br>
{%- endfor -%}
{%- endif -%}
{%- if entry.media_thumbnail -%}
<strong>Thumbnail:</strong> <a href="{{ entry.media_thumbnail[0].url if entry.media_thumbnail[0].url else entry.media_thumbnail[0] }}">{{ entry.media_thumbnail[0].url if entry.media_thumbnail[0].url else entry.media_thumbnail[0] }}</a><br>
{%- endif -%}
{%- if entry.media_description -%}
<strong>Media Description:</strong> {{ entry.media_description }}<br>
{%- endif -%}
{%- if entry.itunes_duration -%}
<strong>Duration:</strong> {{ entry.itunes_duration }}<br>
{%- endif -%}
{%- if entry.itunes_author -%}
<strong>Podcast Author:</strong> {{ entry.itunes_author }}<br>
{%- endif -%}
{%- if entry.dc_identifier -%}
<strong>Identifier:</strong> {{ entry.dc_identifier }}<br>
{%- endif -%}
{%- if entry.dc_source -%}
<strong>DC Source:</strong> {{ entry.dc_source }}<br>
{%- endif -%}
{%- if entry.dc_type -%}
<strong>Type:</strong> {{ entry.dc_type }}<br>
{%- endif -%}
{%- if entry.dc_format -%}
<strong>Format:</strong> {{ entry.dc_format }}<br>
{%- endif -%}
{%- if entry.dc_relation -%}
<strong>Related:</strong> {{ entry.dc_relation }}<br>
{%- endif -%}
{%- if entry.dc_coverage -%}
<strong>Coverage:</strong> {{ entry.dc_coverage }}<br>
{%- endif -%}
{%- if entry.source and entry.source.title -%}
<strong>Source:</strong> {{ entry.source.title }}
{%- if entry.source.link %} (<a href="{{ entry.source.link }}">{{ entry.source.link }}</a>){% endif -%}
<br>
{%- endif -%}
{%- if entry.dc_content -%}
<strong>Content:</strong> {{ entry.dc_content | safe }}
{%- elif entry.content and entry.content[0].value -%}
<strong>Content:</strong> {{ entry.content[0].value | safe }}
{%- elif entry.summary -%}
<strong>Summary:</strong> {{ entry.summary | safe }}
{%- endif -%}</article>
"""
def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
"""
Format RSS/Atom feed items in a readable text format using feedparser and Jinja2.
Format RSS/Atom feed items in a readable text format using feedparser.
Converts RSS <item> or Atom <entry> elements to formatted text with all available fields:
- Basic fields: title, link, id/guid, published date, updated date
- Author fields: author, author_detail, contributors, publisher
- Content fields: content, summary, description
- Metadata: tags, category, rights, license
- Media: enclosures, media_content, media_thumbnail
- Dublin Core elements: dc:creator, dc:date, dc:publisher, etc. (mapped by feedparser)
Converts RSS <item> or Atom <entry> elements to formatted text with:
- <title> → <h1>Title</h1>
- <link> → Link: [url]
- <guid> → Guid: [id]
- <pubDate> → PubDate: [date]
- <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
Args:
rss_content: The RSS/Atom feed content
@@ -168,19 +49,65 @@ def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
"""
try:
import feedparser
from changedetectionio.jinja2_custom import safe_jinja
from xml.sax.saxutils import escape as xml_escape
# Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
feed = feedparser.parse(rss_content)
# Determine feed type for appropriate labels
formatted_items = []
# Determine feed type for appropriate labels when fields are missing
# feedparser sets feed.version to things like 'rss20', 'atom10', etc.
is_atom = feed.version and 'atom' in feed.version
formatted_items = []
for entry in feed.entries:
# Render the entry using Jinja2 template
rendered = safe_jinja.render(RSS_ENTRY_TEMPLATE, entry=entry, is_atom=is_atom)
formatted_items.append(rendered.strip())
item_parts = []
# Title - feedparser handles CDATA and entity unescaping automatically
if hasattr(entry, 'title') and entry.title:
item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>')
# Link
if hasattr(entry, 'link') and entry.link:
item_parts.append(f'Link: {xml_escape(entry.link)}<br>')
# GUID/ID
if hasattr(entry, 'id') and entry.id:
item_parts.append(f'Guid: {xml_escape(entry.id)}<br>')
# Date - feedparser normalizes all date field names to 'published'
if hasattr(entry, 'published') and entry.published:
item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>')
# Description/Content - feedparser handles CDATA and entity unescaping automatically
# Only add "Summary:" label for Atom <summary> tags
content = None
add_label = False
if hasattr(entry, 'content') and entry.content:
# Atom <content> - no label, just content
content = entry.content[0].value if entry.content[0].value else None
elif hasattr(entry, 'summary'):
# Could be RSS <description> or Atom <summary>
# feedparser maps both to entry.summary
content = entry.summary if entry.summary else None
# Only add "Summary:" label for Atom feeds (which use <summary> tag)
if is_atom:
add_label = True
# Add content with or without label
if content:
if add_label:
item_parts.append(f'Summary:<br>{content}')
else:
item_parts.append(content)
else:
# No content - just show <none>
item_parts.append('&lt;none&gt;')
# Join all parts of this item
if item_parts:
formatted_items.append('\n'.join(item_parts))
# Wrap each item in a div with classes (first, last, item-N)
items_html = []
@@ -195,8 +122,7 @@ def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
class_str = ' '.join(classes)
items_html.append(f'<div class="{class_str}">{item}</div>')
return '<html><body>\n' + "\n<br>".join(items_html) + '\n</body></html>'
return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>'
except Exception as e:
logger.warning(f"Error formatting RSS items: {str(e)}")

View File

@@ -11,86 +11,36 @@ set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# Since theres no curl installed lets roll with python3
check_sanity() {
local port="$1"
if [ -z "$port" ]; then
echo "Usage: check_sanity <port>" >&2
return 1
fi
find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
done
python3 - "$port" <<'PYCODE'
import sys, time, urllib.request, socket
port = sys.argv[1]
url = f'http://localhost:{port}'
ok = False
for _ in range(6): # --retry 6
try:
r = urllib.request.urlopen(url, timeout=3).read().decode()
if 'est-url-is-sanity' in r:
ok = True
break
except (urllib.error.URLError, ConnectionRefusedError, socket.error):
time.sleep(1)
sys.exit(0 if ok else 1)
PYCODE
}
data_sanity_test () {
# Restart data sanity test
cd ..
TMPDIR=$(mktemp -d)
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
PID=$!
sleep 5
kill $PID
sleep 2
./changedetection.py -p $PORT_N -d $TMPDIR &
PID=$!
sleep 5
# On a restart the URL should still be there
check_sanity $PORT_N || exit 1
kill $PID
cd $OLDPWD
# datastore looks alright, continue
}
data_sanity_test
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load tests/test_*.py
#time pytest -n auto --dist loadfile -vv --tb=long tests/test_*.py
echo "RUNNING WITH BASE_URL SET"
# Now re-run some tests with BASE_URL enabled
# Re #65 - Ability to include a link back to the installation, in the notification.
export BASE_URL="https://really-unique-domain.io"
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 tests/test_notification.py
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
# Re-run with HIDE_REFERER set - could affect login
export HIDE_REFERER=True
pytest -vv -s --maxfail=1 tests/test_access_control.py
pytest tests/test_access_control.py
# Re-run a few tests that will trigger brotli based storage
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
pytest -vv -s --maxfail=1 tests/test_access_control.py
pytest tests/test_access_control.py
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
pytest -vv -s --maxfail=1 tests/test_backend.py
pytest -vv -s --maxfail=1 tests/test_rss.py
pytest -vv -s --maxfail=1 tests/test_unique_lines.py
pytest tests/test_backend.py
pytest tests/test_rss.py
pytest tests/test_unique_lines.py
# Try high concurrency
FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
# Check file:// will pickup a file when enabled
echo "Hello world" > /tmp/test-file.txt
ALLOW_FILE_URI=yes pytest -vv -s tests/test_security.py
ALLOW_FILE_URI=yes pytest tests/test_security.py

View File

@@ -6,8 +6,6 @@
# enable debug
set -x
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
# A extra browser is configured, but we never chose to use it, so it should NOT show in the logs
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url'

View File

@@ -19,13 +19,12 @@ docker run --network changedet-network -d \
-v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \
ubuntu/squid:4.13-21.10_edge
sleep 5
## 2nd test actually choose the preferred proxy from proxies.json
# This will force a request via "proxy-two"
docker run --network changedet-network \
-v `pwd`/tests/proxy_list/proxies.json-example:/tmp/proxies.json \
-v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest -s tests/proxy_list/test_multiple_proxy.py --datastore-path /tmp'
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py'
set +e
echo "- Looking for chosen.changedetection.io request in squid-one - it should NOT be here"
@@ -49,10 +48,8 @@ fi
# Test the UI configurable proxies
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py --datastore-path /tmp'
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py'
# Give squid proxies a moment to flush their logs
sleep 2
# Should see a request for one.changedetection.io in there
echo "- Looking for .changedetection.io request in squid-custom"
@@ -66,10 +63,7 @@ fi
# Test "no-proxy" option
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py --datastore-path /tmp'
# Give squid proxies a moment to flush their logs
sleep 2
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py'
# We need to handle grep returning 1
set +e
@@ -86,8 +80,6 @@ for c in $(echo "squid-one squid-two squid-custom"); do
fi
done
echo "docker ps output"
docker ps
docker kill squid-one squid-two squid-custom
@@ -96,19 +88,19 @@ docker kill squid-one squid-two squid-custom
# Requests
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
# Playwright
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
# Puppeteer fast
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
# Selenium
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'

View File

@@ -5,7 +5,6 @@ set -e
# enable debug
set -x
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
# SOCKS5 related - start simple Socks5 proxy server
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
@@ -15,13 +14,13 @@ docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p
echo "---------------------------------- SOCKS5 -------------------"
# SOCKS5 related - test from proxies.json
docker run --network changedet-network \
-v `pwd`/tests/proxy_socks5/proxies.json-example:/tmp/proxies.json \
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
--rm \
-e "FLASK_SERVER_NAME=cdio" \
--hostname cdio \
-e "SOCKSTEST=proxiesjson" \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp'
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
# SOCKS5 related - by manually entering in UI
docker run --network changedet-network \
@@ -30,18 +29,18 @@ docker run --network changedet-network \
--hostname cdio \
-e "SOCKSTEST=manual" \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py --datastore-path /tmp'
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
docker run --network changedet-network \
-e "SOCKSTEST=manual-playwright" \
--hostname cdio \
-e "FLASK_SERVER_NAME=cdio" \
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/tmp/proxies.json \
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
--rm \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp'
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
echo "socks5 server logs"
docker logs socks5proxy

View File

@@ -1,92 +1,11 @@
function setupDiffNavigation() {
var $fromSelect = $('#diff-from-version');
var $toSelect = $('#diff-to-version');
var $fromSelected = $fromSelect.find('option:selected');
var $toSelected = $toSelect.find('option:selected');
if ($fromSelected.length && $toSelected.length) {
// Find the previous pair (move both back one position)
var $prevFrom = $fromSelected.prev();
var $prevTo = $toSelected.prev();
// Find the next pair (move both forward one position)
var $nextFrom = $fromSelected.next();
var $nextTo = $toSelected.next();
// Build URL with current diff preferences
var currentParams = new URLSearchParams(window.location.search);
// Previous button: only show if both can move back
if ($prevFrom.length && $prevTo.length) {
currentParams.set('from_version', $prevFrom.val());
currentParams.set('to_version', $prevTo.val());
$('#btn-previous').attr('href', '?' + currentParams.toString());
} else {
$('#btn-previous').remove();
}
// Next button: only show if both can move forward
if ($nextFrom.length && $nextTo.length) {
currentParams.set('from_version', $nextFrom.val());
currentParams.set('to_version', $nextTo.val());
$('#btn-next').attr('href', '?' + currentParams.toString());
} else {
$('#btn-next').remove();
}
}
// Keyboard navigation
window.addEventListener('keydown', function (event) {
// Don't trigger if user is typing in an input field
if (event.target.tagName === 'INPUT' || event.target.tagName === 'TEXTAREA' || event.target.tagName === 'SELECT') {
return;
}
var $fromSelected = $fromSelect.find('option:selected');
var $toSelected = $toSelect.find('option:selected');
if ($fromSelected.length && $toSelected.length) {
if (event.key === 'ArrowLeft') {
var $prevFrom = $fromSelected.prev();
var $prevTo = $toSelected.prev();
if ($prevFrom.length && $prevTo.length) {
var prevHref = $('#btn-previous').attr('href');
if (prevHref) {
event.preventDefault();
window.location.href = prevHref;
}
}
} else if (event.key === 'ArrowRight') {
var $nextFrom = $fromSelected.next();
var $nextTo = $toSelected.next();
if ($nextFrom.length && $nextTo.length) {
var nextHref = $('#btn-next').attr('href');
if (nextHref) {
event.preventDefault();
window.location.href = nextHref;
}
}
}
}
}, false);
}
$(document).ready(function () {
$('.needs-localtime').each(function () {
for (var option of this.options) {
var dateObject = new Date(option.value * 1000);
var formattedDate = dateObject.toLocaleString(undefined, {dateStyle: "full", timeStyle: "medium"});
// Preserve any existing text in the label (like "(Previous)" or "(Current)")
var existingText = option.text.replace(option.value, '').trim();
option.label = existingText ? formattedDate + ' ' + existingText : formattedDate;
option.label = dateObject.toLocaleString(undefined, {dateStyle: "full", timeStyle: "medium"});
}
});
// Setup keyboard navigation for diff versions
if ($('#diff-from-version').length && $('#diff-to-version').length) {
setupDiffNavigation();
}
// Load it when the #screenshot tab is in use, so we dont give a slow experience when waiting for the text diff to load
window.addEventListener('hashchange', function (e) {
toggle(location.hash);
@@ -108,51 +27,16 @@ $(document).ready(function () {
}
}
const article = $('#difference')[0];
const article = $('.highlightable-filter')[0];
// We could also add the 'touchend' event for touch devices, but since
// most iOS/Android browsers already show a dialog when you select
// text (often with a Share option) we'll skip that
if (article) {
article.addEventListener('mousedown', clean, false);
}
// Because they might 'mouse up' outside the article but on the page
const d_page = $(".difference-page")[0]
if (d_page ) {
d_page.addEventListener('mouseup', dragTextHandler, false);
}
$('#highlightSnippetActions a').bind('click', function (e) {
if (!window.getSelection().toString().trim().length) {
alert('Oops no text selected!');
return;
}
$.ajax({
type: "POST",
url: highlight_submit_ignore_url,
data: {'mode': $(this).data('mode'), 'selection': window.getSelection().toString()},
statusCode: {
400: function () {
// More than likely the CSRF token was lost when the server restarted
alert("There was a problem processing the request, please reload the page.");
}
}
}).done(function (data) {
// @todo some feedback
alert("'Ignore' Filters for this watch were updated.")
clean();
}).fail(function (data) {
console.log(data);
alert('There was an error communicating with the server.');
})
});
article.addEventListener('mouseup', dragTextHandler, false);
article.addEventListener('mousedown', clean, false);
function clean(event) {
$('#bottom-horizontal-offscreen').hide();
$("#highlightSnippet").remove();
}
// Listen for Escape key press
@@ -167,9 +51,46 @@ $(document).ready(function () {
// Check if any text was selected
if (window.getSelection().toString().length > 0) {
$('#bottom-horizontal-offscreen').show();
} else {
clean();
// Find out how much (if any) user has scrolled
var scrollTop = (window.pageYOffset !== undefined) ? window.pageYOffset : (document.documentElement || document.body.parentNode || document.body).scrollTop;
// Get cursor position
const posX = event.clientX;
const posY = event.clientY + 20 + scrollTop;
// Append HTML to the body, create the "Tweet Selection" dialog
document.body.insertAdjacentHTML('beforeend', '<div id="highlightSnippet" style="position: absolute; top: ' + posY + 'px; left: ' + posX + 'px;"><div class="pure-form-message-inline" style="font-size: 70%">Ignore any change on any line which contains the selected text.</div><br><a data-mode="exact" href="javascript:void(0);" class="pure-button button-secondary button-xsmall">Ignore exact text</a>&nbsp;</div>');
if (/\d/.test(window.getSelection().toString())) {
// Offer regex replacement
document.getElementById("highlightSnippet").insertAdjacentHTML('beforeend', '<a data-mode="digit-regex" href="javascript:void(0);" class="pure-button button-secondary button-xsmall">Ignore text including number changes</a>');
}
$('#highlightSnippet a').bind('click', function (e) {
if(!window.getSelection().toString().trim().length) {
alert('Oops no text selected!');
return;
}
$.ajax({
type: "POST",
url: highlight_submit_ignore_url,
data: {'mode': $(this).data('mode'), 'selection': window.getSelection().toString()},
statusCode: {
400: function () {
// More than likely the CSRF token was lost when the server restarted
alert("There was a problem processing the request, please reload the page.");
}
}
}).done(function (data) {
$("#highlightSnippet").html(data)
}).fail(function (data) {
console.log(data);
alert('There was an error communicating with the server.');
});
});
}
}
@@ -179,12 +100,4 @@ $(document).ready(function () {
alert('Error - You are trying to compare the same version.');
}
});
// Auto-submit form on change of any input elements (checkboxes, radio buttons, dropdowns)
$('#diff-form').on('change', 'input[type="checkbox"], input[type="radio"], select', function (e) {
// Check if we're trying to compare the same version before submitting
if ($('select[name=from_version]').val() !== $('select[name=to_version]').val()) {
$('#diff-form').submit();
}
});
});

View File

@@ -1,152 +1,115 @@
$(document).ready(function () {
// Find all <span> elements inside pre#difference
var inputs = $('#difference span').toArray();
inputs.current = 0;
// Setup visual minimap of difference locations (cells are pre-built in Python)
var $visualizer = $('#cell-diff-jump-visualiser');
var $difference = $('#difference');
var $cells = $visualizer.find('> div');
var visualizerResolutionCells = $cells.length;
var cellHeight;
if ($difference.length && visualizerResolutionCells > 0) {
var docHeight = $difference[0].scrollHeight;
cellHeight = docHeight / visualizerResolutionCells;
// Add click handlers to pre-built cells
$cells.each(function(i) {
$(this).data('cellIndex', i);
$(this).on('click', function() {
var cellIndex = $(this).data('cellIndex');
var targetPositionInDifference = cellIndex * cellHeight;
var viewportHeight = $(window).height();
// Scroll so target is at viewport center (where eyes expect it)
window.scrollTo({
top: $difference.offset().top + targetPositionInDifference - (viewportHeight / 2),
behavior: "smooth"
});
});
});
}
var a = document.getElementById("a");
var b = document.getElementById("b");
var result = document.getElementById("result");
var inputs;
$('#jump-next-diff').click(function () {
if (!inputs || inputs.length === 0) return;
// Find the next change after current scroll position
var currentScrollPos = $(window).scrollTop();
var viewportHeight = $(window).height();
var currentCenter = currentScrollPos + (viewportHeight / 2);
// Add small buffer (50px) to jump past changes already near center
var searchFromPosition = currentCenter + 50;
var nextElement = null;
for (var i = 0; i < inputs.length; i++) {
var elementTop = $(inputs[i]).offset().top;
if (elementTop > searchFromPosition) {
nextElement = inputs[i];
break;
}
}
// If no element found ahead, wrap to first element
if (!nextElement) {
nextElement = inputs[0];
}
// Scroll to position the element at viewport center
var elementTop = $(nextElement).offset().top;
var targetScrollPos = elementTop - (viewportHeight / 2);
var element = inputs[inputs.current];
var headerOffset = 80;
var elementPosition = element.getBoundingClientRect().top;
var offsetPosition = elementPosition - headerOffset + window.scrollY;
window.scrollTo({
top: targetScrollPos,
top: offsetPosition,
behavior: "smooth",
});
inputs.current++;
if (inputs.current >= inputs.length) {
inputs.current = 0;
}
});
// Track current scroll position in visualizer
function updateVisualizerPosition() {
if (!$difference.length || visualizerResolutionCells === 0) return;
function changed() {
// https://github.com/kpdecker/jsdiff/issues/389
// I would love to use `{ignoreWhitespace: true}` here but it breaks the formatting
options = {
ignoreWhitespace: document.getElementById("ignoreWhitespace").checked,
};
var scrollTop = $(window).scrollTop();
var viewportHeight = $(window).height();
var viewportCenter = scrollTop + (viewportHeight / 2);
var differenceTop = $difference.offset().top;
var differenceHeight = $difference[0].scrollHeight;
var positionInDifference = viewportCenter - differenceTop;
// Handle edge case: if we're at max scroll, show last cell
// This prevents shorter documents from never reaching 100%
var maxScrollTop = $(document).height() - viewportHeight;
var isAtBottom = scrollTop >= maxScrollTop - 10; // 10px tolerance
// Calculate which cell we're currently viewing
var currentCell;
if (isAtBottom) {
currentCell = visualizerResolutionCells - 1;
} else {
currentCell = Math.floor(positionInDifference / cellHeight);
currentCell = Math.max(0, Math.min(currentCell, visualizerResolutionCells - 1));
}
// Remove previous active marker and add to current cell
$visualizer.find('> div').removeClass('current-position');
$visualizer.find('> div').eq(currentCell).addClass('current-position');
}
// Recalculate cellHeight on window resize
function handleResize() {
if ($difference.length) {
var docHeight = $difference[0].scrollHeight;
cellHeight = docHeight / visualizerResolutionCells;
updateVisualizerPosition();
}
}
// Debounce scroll and resize events to reduce CPU usage
$(window).on('scroll', updateVisualizerPosition.debounce(5));
$(window).on('resize', handleResize.debounce(100));
// Initial scroll to specific line if requested
if (typeof initialScrollToLineNumber !== 'undefined' && initialScrollToLineNumber !== null && $difference.length) {
// Convert line number to text position and scroll to it
var diffText = $difference.text();
var lines = diffText.split('\n');
if (initialScrollToLineNumber > 0 && initialScrollToLineNumber <= lines.length) {
// Calculate character position of the target line
var charPosition = 0;
for (var i = 0; i < initialScrollToLineNumber - 1; i++) {
charPosition += lines[i].length + 1; // +1 for newline
var diff = Diff[window.diffType](a.textContent, b.textContent, options);
var fragment = document.createDocumentFragment();
for (var i = 0; i < diff.length; i++) {
if (diff[i].added && diff[i + 1] && diff[i + 1].removed) {
var swap = diff[i];
diff[i] = diff[i + 1];
diff[i + 1] = swap;
}
// Estimate vertical position based on average line height
var totalChars = diffText.length;
var totalHeight = $difference[0].scrollHeight;
var estimatedTop = (charPosition / totalChars) * totalHeight;
// Scroll to position with line at viewport center
var viewportHeight = $(window).height();
setTimeout(function() {
window.scrollTo({
top: $difference.offset().top + estimatedTop - (viewportHeight / 2),
behavior: "smooth"
});
}, 100); // Small delay to ensure page is fully loaded
var node;
if (diff[i].removed) {
node = document.createElement("del");
node.classList.add("change");
const wrapper = node.appendChild(document.createElement("span"));
wrapper.appendChild(document.createTextNode(diff[i].value));
} else if (diff[i].added) {
node = document.createElement("ins");
node.classList.add("change");
const wrapper = node.appendChild(document.createElement("span"));
wrapper.appendChild(document.createTextNode(diff[i].value));
} else {
node = document.createTextNode(diff[i].value);
}
fragment.appendChild(node);
}
result.textContent = "";
result.appendChild(fragment);
// For nice mouse-over hover/title information
const removed_current_option = $('#diff-version option:selected')
if (removed_current_option) {
$('del').each(function () {
$(this).prop('title', 'Removed '+removed_current_option[0].label);
});
}
const inserted_current_option = $('#current-version option:selected')
if (removed_current_option) {
$('ins').each(function () {
$(this).prop('title', 'Inserted '+inserted_current_option[0].label);
});
}
// Set the list of possible differences to jump to
inputs = document.querySelectorAll('#diff-ui .change')
// Set the "current" diff pointer
inputs.current = 0;
// Goto diff
$('#jump-next-diff').click();
}
// Initial position update
if ($difference.length && cellHeight) {
updateVisualizerPosition();
onDiffTypeChange(
document.querySelector('#settings [name="diff_type"]:checked'),
);
changed();
a.onpaste = a.onchange = b.onpaste = b.onchange = changed;
if ("oninput" in a) {
a.oninput = b.oninput = changed;
} else {
a.onkeyup = b.onkeyup = changed;
}
function changed() {
//$('#jump-next-diff').click();
function onDiffTypeChange(radio) {
window.diffType = radio.value;
// Not necessary
// document.title = "Diff " + radio.value.slice(4);
}
var radio = document.getElementsByName("diff_type");
for (var i = 0; i < radio.length; i++) {
radio[i].onchange = function (e) {
onDiffTypeChange(e.target);
changed();
};
}
document.getElementById("ignoreWhitespace").onchange = function (e) {
changed();
};
});

38
changedetectionio/static/js/diff.min.js vendored Normal file

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More