mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-01-06 15:20:22 +00:00
Compare commits
113 Commits
regex-filt
...
i18n-2026
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
641300578b | ||
|
|
4c90fd185f | ||
|
|
40dc3fef7e | ||
|
|
5f4998960d | ||
|
|
7a515c4202 | ||
|
|
48e21226a1 | ||
|
|
cdf34bf614 | ||
|
|
a94560190f | ||
|
|
fefaf40514 | ||
|
|
6f66c39628 | ||
|
|
eb0f83b45b | ||
|
|
f2284f7a9b | ||
|
|
4b0ad525f3 | ||
|
|
a748a43224 | ||
|
|
acfcaf42d4 | ||
|
|
6158bb48b8 | ||
|
|
d4fc1a3b6e | ||
|
|
f39b5e5a46 | ||
|
|
30ba603956 | ||
|
|
3147c5a3e2 | ||
|
|
f599efacab | ||
|
|
d7dbc50d70 | ||
|
|
51bb358ea7 | ||
|
|
fe4df1d41f | ||
|
|
39274f121c | ||
|
|
4b1d871078 | ||
|
|
f78c2dcffd | ||
|
|
1c2c22b8df | ||
|
|
3276a9347a | ||
|
|
d763bb4267 | ||
|
|
be3c9892e0 | ||
|
|
0be5005776 | ||
|
|
12ce03c0bb | ||
|
|
3767a2d5b9 | ||
|
|
71c8d8b1b1 | ||
|
|
20cbe6f510 | ||
|
|
3a6e1f908f | ||
|
|
73fdbf24e3 | ||
|
|
629f939224 | ||
|
|
48299e5738 | ||
|
|
5b1b70b8ab | ||
|
|
678d568b37 | ||
|
|
fb15b62fb9 | ||
|
|
8dc39d4a3d | ||
|
|
805cd618d4 | ||
|
|
4ba5fcce8f | ||
|
|
b9305faf21 | ||
|
|
3d3b53831e | ||
|
|
2ae29ab78f | ||
|
|
caffd804fe | ||
|
|
c58a97f69d | ||
|
|
e2b407c6f3 | ||
|
|
d65a2c784d | ||
|
|
9bc812a167 | ||
|
|
fd2080567d | ||
|
|
969c75e7be | ||
|
|
4b14cec5f4 | ||
|
|
a8d5ea067d | ||
|
|
2f6873f7d5 | ||
|
|
dfa85ab932 | ||
|
|
72ec8d8aaa | ||
|
|
ee824c856b | ||
|
|
c2ee1c582a | ||
|
|
374649cc10 | ||
|
|
f638bc6332 | ||
|
|
5742ad6fab | ||
|
|
0022201918 | ||
|
|
2e4f40b172 | ||
|
|
80b614afa1 | ||
|
|
d18029ffe4 | ||
|
|
9a44509134 | ||
|
|
33ab4c8891 | ||
|
|
e1028f822d | ||
|
|
ae1cd61e61 | ||
|
|
a5fe1a771f | ||
|
|
b0980f45b8 | ||
|
|
afadaf5467 | ||
|
|
db11f601a1 | ||
|
|
ef04840dd2 | ||
|
|
1628586553 | ||
|
|
a23c07ba94 | ||
|
|
431fd168a1 | ||
|
|
7dbd0b75b2 | ||
|
|
ae532c82e8 | ||
|
|
ab0b85d088 | ||
|
|
66aec365c2 | ||
|
|
e09cea60ef | ||
|
|
f304ae19db | ||
|
|
2116b2cb93 | ||
|
|
8f580ac96b | ||
|
|
a8cadc3d16 | ||
|
|
c9290d73e0 | ||
|
|
2db5e906e9 | ||
|
|
0751bd371a | ||
|
|
3ffa0805e9 | ||
|
|
3335270692 | ||
|
|
a7573b10ec | ||
|
|
df945ad743 | ||
|
|
4536e95205 | ||
|
|
1479d7bd46 | ||
|
|
9ba2094f75 | ||
|
|
8aa012ba8e | ||
|
|
8bc6b10db1 | ||
|
|
76d799c95b | ||
|
|
7c8bdfcc9f | ||
|
|
01a938d7ce | ||
|
|
e44853c439 | ||
|
|
3830bec891 | ||
|
|
88ab663330 | ||
|
|
68335b95c3 | ||
|
|
7bbfa0ef32 | ||
|
|
e233d52931 | ||
|
|
181d32e82a |
51
.github/actions/extract-memory-report/action.yml
vendored
Normal file
51
.github/actions/extract-memory-report/action.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: 'Extract Memory Test Report'
|
||||
description: 'Extracts and displays memory test report from a container'
|
||||
inputs:
|
||||
container-name:
|
||||
description: 'Name of the container to extract logs from'
|
||||
required: true
|
||||
python-version:
|
||||
description: 'Python version for artifact naming'
|
||||
required: true
|
||||
output-dir:
|
||||
description: 'Directory to store output logs'
|
||||
required: false
|
||||
default: 'output-logs'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Create output directory
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p ${{ inputs.output-dir }}
|
||||
|
||||
- name: Dump container log
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Disabled for now"
|
||||
# return
|
||||
# docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout"
|
||||
# docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr"
|
||||
|
||||
- name: Extract and display memory test report
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Disabled for now"
|
||||
# echo "Extracting test-memory.log from container..."
|
||||
# docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container"
|
||||
#
|
||||
# echo "=== Top 10 Highest Peak Memory Tests ==="
|
||||
# if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then
|
||||
# grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \
|
||||
# sed 's/.*Peak memory: //' | \
|
||||
# paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \
|
||||
# sort -t'|' -k1 -nr | \
|
||||
# cut -d'|' -f2 | \
|
||||
# head -10
|
||||
# echo ""
|
||||
# echo "=== Full Memory Test Report ==="
|
||||
# cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log
|
||||
# else
|
||||
# echo "No memory log available"
|
||||
# fi
|
||||
16
.github/test/Dockerfile-alpine
vendored
16
.github/test/Dockerfile-alpine
vendored
@@ -7,6 +7,8 @@ ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
RUN \
|
||||
apk add --update --no-cache --virtual=build-dependencies \
|
||||
build-base \
|
||||
@@ -27,7 +29,19 @@ RUN \
|
||||
file \
|
||||
nodejs \
|
||||
poppler-utils \
|
||||
python3 && \
|
||||
python3 \
|
||||
glib \
|
||||
libsm \
|
||||
libxext \
|
||||
libxrender && \
|
||||
case "$TARGETPLATFORM" in \
|
||||
linux/arm/v7|linux/arm/v8) \
|
||||
echo "INFO: Skipping py3-opencv on $TARGETPLATFORM (using pixelmatch fallback)" \
|
||||
;; \
|
||||
*) \
|
||||
apk add --update --no-cache py3-opencv || echo "WARN: py3-opencv install failed, using pixelmatch fallback" \
|
||||
;; \
|
||||
esac && \
|
||||
echo "**** pip3 install test of changedetection.io ****" && \
|
||||
python3 -m venv /lsiopy && \
|
||||
pip install -U pip wheel setuptools && \
|
||||
|
||||
2
.github/workflows/codeql-analysis.yml
vendored
2
.github/workflows/codeql-analysis.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
|
||||
15
.github/workflows/containers.yml
vendored
15
.github/workflows/containers.yml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- dev
|
||||
|
||||
jobs:
|
||||
metadata:
|
||||
@@ -39,12 +40,20 @@ jobs:
|
||||
# Or if we are in a tagged release scenario.
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
@@ -84,10 +93,10 @@ jobs:
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
# master branch -> :dev container tag
|
||||
# dev branch -> :dev container tag
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref }} == "refs/heads/master"
|
||||
if: ${{ github.ref == 'refs/heads/dev' }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
|
||||
8
.github/workflows/pypi-release.yml
vendored
8
.github/workflows/pypi-release.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v5
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v5
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
12
.github/workflows/test-container-build.yml
vendored
12
.github/workflows/test-container-build.yml
vendored
@@ -44,12 +44,20 @@ jobs:
|
||||
- platform: linux/arm64
|
||||
dockerfile: ./.github/test/Dockerfile-alpine
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
@@ -74,5 +82,5 @@ jobs:
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=min
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
|
||||
9
.github/workflows/test-only.yml
vendored
9
.github/workflows/test-only.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
lint-code:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Lint with Ruff
|
||||
run: |
|
||||
pip install ruff
|
||||
@@ -21,6 +21,8 @@ jobs:
|
||||
python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
|
||||
|
||||
test-application-3-10:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
@@ -28,12 +30,15 @@ jobs:
|
||||
|
||||
|
||||
test-application-3-11:
|
||||
# Always run
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
test-application-3-12:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
@@ -41,6 +46,8 @@ jobs:
|
||||
skip-pypuppeteer: true
|
||||
|
||||
test-application-3-13:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
|
||||
472
.github/workflows/test-stack-reusable-workflow.yml
vendored
472
.github/workflows/test-stack-reusable-workflow.yml
vendored
@@ -15,141 +15,294 @@ on:
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
test-application:
|
||||
# Build the Docker image once and share it with all test jobs
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# Mainly just for link/flake8
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
run: |
|
||||
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
# Build a changedetection.io container and start testing inside
|
||||
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
|
||||
# Debug info
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
|
||||
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'python3 --version'
|
||||
|
||||
- name: Spin up ancillary testable services
|
||||
run: |
|
||||
|
||||
docker network create changedet-network
|
||||
|
||||
# Selenium
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
|
||||
# SocketPuppetBrowser + Extra for custom browser test
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run test-changedetectionio bash -c 'python3 --version'
|
||||
|
||||
- name: Spin up ancillary SMTP+Echo message test server
|
||||
- name: Save Docker image
|
||||
run: |
|
||||
# Debug SMTP server/echo message back server, telnet 11080 to it should immediately bounce back the most recent message that tried to send (then you can see if cdio tried to send, the format, etc)
|
||||
# 11025 is the SMTP port for testing
|
||||
# apprise example would be 'mailto://changedetection@localhost:11025/?to=fff@home.com (it will also echo to STDOUT)
|
||||
# telnet localhost 11080
|
||||
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
|
||||
docker ps
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Show docker container state and other debug info
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
retention-days: 1
|
||||
|
||||
# Unit tests (lightweight, no ancillary services needed)
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
set -x
|
||||
echo "Running processes in docker..."
|
||||
docker ps
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
# Unit tests
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
|
||||
- name: Test built container with Pytest (generally as requests/plaintext fetching)
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 25
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
# All tests
|
||||
echo "run test with pytest"
|
||||
# The default pytest logger_level is TRACE
|
||||
# To change logger_level for pytest(test/conftest.py),
|
||||
# append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG'
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
# PLAYWRIGHT/NODE-> CDP
|
||||
- name: Playwright and SocketPuppetBrowser - Specific tests in built container
|
||||
- name: Test built container with Pytest
|
||||
run: |
|
||||
# Playwright via Sockpuppetbrowser fetch
|
||||
# tests/visualselector/test_fetch_data.py will do browser steps
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
with:
|
||||
container-name: test-cdio-basic-tests
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
||||
run: |
|
||||
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Playwright and SocketPuppetBrowser - Restock detection
|
||||
run: |
|
||||
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# STRAIGHT TO CDP
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
# Playwright via Sockpuppetbrowser fetch
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Restock detection
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
run: |
|
||||
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
- name: Playwright - Specific tests in built container
|
||||
run: |
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Playwright - Headers and requests
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
|
||||
|
||||
- name: Playwright - Restock detection
|
||||
run: |
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
# Pyppeteer tests
|
||||
pyppeteer-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Pyppeteer - Specific tests in built container
|
||||
run: |
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Pyppeteer - Headers and requests checks
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
- name: Pyppeteer - Restock detection
|
||||
run: |
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
# Selenium tests
|
||||
selenium-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
sleep 3
|
||||
|
||||
- name: Specific tests for headers and requests checks with Selenium
|
||||
run: |
|
||||
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
# SELENIUM
|
||||
- name: Specific tests in built container for Selenium
|
||||
run: |
|
||||
# Selenium fetch
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
|
||||
- name: Specific tests in built container for headers and requests checks with Selenium
|
||||
|
||||
# SMTP tests
|
||||
smtp-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up SMTP test server
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
|
||||
|
||||
# OTHER STUFF
|
||||
- name: Test SMTP notification mime types
|
||||
run: |
|
||||
# SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
|
||||
# "mailserver" hostname defined above
|
||||
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
|
||||
|
||||
# @todo Add a test via playwright/puppeteer
|
||||
# squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py
|
||||
- name: Test proxy squid style interaction
|
||||
# Proxy tests
|
||||
proxy-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Test proxy Squid style interaction
|
||||
run: |
|
||||
cd changedetectionio
|
||||
./run_proxy_tests.sh
|
||||
docker ps
|
||||
cd ..
|
||||
|
||||
- name: Test proxy SOCKS5 style interaction
|
||||
@@ -158,28 +311,65 @@ jobs:
|
||||
./run_socks_proxy_tests.sh
|
||||
cd ..
|
||||
|
||||
# Custom browser URL tests
|
||||
custom-browser-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Test custom browser URL
|
||||
run: |
|
||||
cd changedetectionio
|
||||
./run_custom_browser_url_tests.sh
|
||||
cd ..
|
||||
|
||||
- name: Test changedetection.io container starts+runs basically without error
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Test container starts+runs basically without error
|
||||
run: |
|
||||
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
|
||||
sleep 3
|
||||
# Should return 0 (no error) when grep finds it
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
|
||||
|
||||
# and IPv6
|
||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||
|
||||
# Check whether TRACE log is enabled.
|
||||
# Also, check whether TRACE came from STDOUT
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
|
||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
|
||||
# Check whether DEBUG is came from STDOUT
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
||||
|
||||
docker kill test-changedetectionio
|
||||
|
||||
- name: Test HTTPS SSL mode
|
||||
@@ -187,102 +377,66 @@ jobs:
|
||||
openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost"
|
||||
docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio
|
||||
sleep 3
|
||||
# Should return 0 (no error) when grep finds it
|
||||
# -k because its self-signed
|
||||
curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid
|
||||
|
||||
docker kill test-changedetectionio-ssl
|
||||
|
||||
- name: Test IPv6 Mode
|
||||
run: |
|
||||
# IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only
|
||||
docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio
|
||||
sleep 3
|
||||
# Should return 0 (no error) when grep finds it on localhost
|
||||
curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid
|
||||
docker kill test-changedetectionio-ipv6
|
||||
|
||||
- name: Test changedetection.io SIGTERM and SIGINT signal shutdown
|
||||
# Signal tests
|
||||
signal-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Test SIGTERM and SIGINT signal shutdown
|
||||
run: |
|
||||
|
||||
echo SIGINT Shutdown request test
|
||||
docker run --name sig-test -d test-changedetectionio
|
||||
sleep 3
|
||||
echo ">>> Sending SIGINT to sig-test container"
|
||||
docker kill --signal=SIGINT sig-test
|
||||
sleep 3
|
||||
# invert the check (it should be not 0/not running)
|
||||
docker ps
|
||||
# check signal catch(STDERR) log. Because of
|
||||
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
|
||||
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1
|
||||
test -z "`docker ps|grep sig-test`"
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Looks like container was running when it shouldnt be"
|
||||
docker ps
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# @todo - scan the container log to see the right "graceful shutdown" text exists
|
||||
docker rm sig-test
|
||||
|
||||
|
||||
echo SIGTERM Shutdown request test
|
||||
docker run --name sig-test -d test-changedetectionio
|
||||
sleep 3
|
||||
echo ">>> Sending SIGTERM to sig-test container"
|
||||
docker kill --signal=SIGTERM sig-test
|
||||
sleep 3
|
||||
# invert the check (it should be not 0/not running)
|
||||
docker ps
|
||||
# check signal catch(STDERR) log. Because of
|
||||
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
|
||||
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1
|
||||
test -z "`docker ps|grep sig-test`"
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Looks like container was running when it shouldnt be"
|
||||
docker ps
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# @todo - scan the container log to see the right "graceful shutdown" text exists
|
||||
docker rm sig-test
|
||||
|
||||
- name: Dump container log
|
||||
if: always()
|
||||
run: |
|
||||
mkdir output-logs
|
||||
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
|
||||
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
|
||||
|
||||
- name: Extract and display memory test report
|
||||
if: always()
|
||||
run: |
|
||||
# Extract test-memory.log from the container
|
||||
echo "Extracting test-memory.log from container..."
|
||||
docker cp test-cdio-basic-tests:/app/changedetectionio/test-memory.log output-logs/test-memory-${{ env.PYTHON_VERSION }}.log || echo "test-memory.log not found in container"
|
||||
|
||||
# Display the memory log contents for immediate visibility in workflow output
|
||||
echo "=== Top 10 Highest Peak Memory Tests ==="
|
||||
if [ -f output-logs/test-memory-${{ env.PYTHON_VERSION }}.log ]; then
|
||||
# Sort by peak memory value (extract number before MB and sort numerically, reverse order)
|
||||
grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log | \
|
||||
sed 's/.*Peak memory: //' | \
|
||||
paste -d'|' - <(grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log) | \
|
||||
sort -t'|' -k1 -nr | \
|
||||
cut -d'|' -f2 | \
|
||||
head -10
|
||||
echo ""
|
||||
echo "=== Full Memory Test Report ==="
|
||||
cat output-logs/test-memory-${{ env.PYTHON_VERSION }}.log
|
||||
else
|
||||
echo "No memory log available"
|
||||
fi
|
||||
|
||||
- name: Store everything including test-datastore
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: .
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -21,6 +21,7 @@ venv/
|
||||
# IDEs
|
||||
.idea
|
||||
.vscode/settings.json
|
||||
*~
|
||||
|
||||
# Datastore files
|
||||
datastore/
|
||||
|
||||
57
Dockerfile
57
Dockerfile
@@ -34,23 +34,46 @@ ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
|
||||
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
|
||||
# Additional environment variables for cryptography Rust build
|
||||
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
-r /requirements.txt
|
||||
|
||||
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
-r /requirements.txt
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
playwright~=1.48.0 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
playwright~=1.56.0 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
# OpenCV is optional for fast image comparison (pixelmatch is the fallback)
|
||||
# Skip on arm/v7 and arm/v8 where builds take weeks - excluded from requirements.txt
|
||||
ARG TARGETPLATFORM
|
||||
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
case "$TARGETPLATFORM" in \
|
||||
linux/arm/v7|linux/arm/v8) \
|
||||
echo "INFO: Skipping OpenCV on $TARGETPLATFORM (build takes too long), using pixelmatch fallback" \
|
||||
;; \
|
||||
*) \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
opencv-python-headless>=4.8.0.76 \
|
||||
|| echo "WARN: OpenCV install failed, will use pixelmatch fallback" \
|
||||
;; \
|
||||
esac
|
||||
|
||||
|
||||
# Final image stage
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
@@ -65,6 +88,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
# favicon type detection and other uses
|
||||
file \
|
||||
zlib1g \
|
||||
# OpenCV dependencies for image processing
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender-dev \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
@@ -85,6 +113,9 @@ EXPOSE 5000
|
||||
# The actual flask app module
|
||||
COPY changedetectionio /app/changedetectionio
|
||||
|
||||
# Compile translation files for i18n support
|
||||
RUN pybabel compile -d /app/changedetectionio/translations
|
||||
|
||||
# Also for OpenAPI validation wrapper - needs the YML
|
||||
RUN [ ! -d "/app/docs" ] && mkdir /app/docs
|
||||
COPY docs/api-spec.yaml /app/docs/api-spec.yaml
|
||||
|
||||
@@ -14,7 +14,7 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
|
||||
|
||||
|
||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti
|
||||
|
||||
### Perform interactive browser steps
|
||||
|
||||
Fill in text boxes, click buttons and more, setup your changedetection scenario.
|
||||
Fill in text boxes, click buttons and more, setup your changedetection scenario.
|
||||
|
||||
Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
|
||||
|
||||
@@ -54,7 +54,7 @@ Requires Playwright to be enabled.
|
||||
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
|
||||
- COVID related news from government websites
|
||||
- University/organisation news from their website
|
||||
- Detect and monitor changes in JSON API responses
|
||||
- Detect and monitor changes in JSON API responses
|
||||
- JSON API monitoring and alerting
|
||||
- Changes in legal and other documents
|
||||
- Trigger API calls via notifications when text appears on a website
|
||||
@@ -86,7 +86,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||
|
||||
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
|
||||
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residential, ISP, Rotating and many other proxy types to suit your project.
|
||||
|
||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||
|
||||
@@ -106,4 +106,3 @@ $ changedetection.io -d /path/to/empty/data/dir -p 5000
|
||||
Then visit http://127.0.0.1:5000 , You should now be able to access the UI.
|
||||
|
||||
See https://changedetection.io for more information.
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ def count_words_in_history(watch):
|
||||
return 0
|
||||
|
||||
latest_key = list(watch.history.keys())[-1]
|
||||
latest_content = watch.get_history_snapshot(latest_key)
|
||||
latest_content = watch.get_history_snapshot(timestamp=latest_key)
|
||||
return len(latest_content.split())
|
||||
except Exception as e:
|
||||
logger.error(f"Error counting words: {str(e)}")
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.50.29'
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.51.4'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
import logging
|
||||
import os
|
||||
import getopt
|
||||
import platform
|
||||
@@ -19,6 +20,57 @@ from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
|
||||
# ==============================================================================
|
||||
# Multiprocessing Configuration - CRITICAL for Thread Safety
|
||||
# ==============================================================================
|
||||
#
|
||||
# PROBLEM: Python 3.12+ warns about fork() with multi-threaded processes:
|
||||
# "This process is multi-threaded, use of fork() may lead to deadlocks"
|
||||
#
|
||||
# WHY IT'S DANGEROUS:
|
||||
# 1. This Flask app has multiple threads (HTTP handlers, workers, SocketIO)
|
||||
# 2. fork() copies ONLY the calling thread to the child process
|
||||
# 3. BUT fork() also copies all locks/mutexes in their current state
|
||||
# 4. If another thread held a lock during fork() → child has locked lock with no owner
|
||||
# 5. Result: PERMANENT DEADLOCK if child tries to acquire that lock
|
||||
#
|
||||
# SOLUTION: Use 'spawn' instead of 'fork'
|
||||
# - spawn starts a fresh Python interpreter (no inherited threads or locks)
|
||||
# - Slower (~200ms vs ~1ms) but safe with multi-threaded parent
|
||||
# - Consistent across all platforms (Windows already uses spawn by default)
|
||||
#
|
||||
# IMPLEMENTATION:
|
||||
# 1. Explicit contexts everywhere (primary protection):
|
||||
# - Watch.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - playwright.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - puppeteer.py: ctx = multiprocessing.get_context('spawn')
|
||||
#
|
||||
# 2. Global default (defense-in-depth, below):
|
||||
# - Safety net if future code forgets explicit context
|
||||
# - Protects against third-party libraries using Process()
|
||||
# - Costs nothing (explicit contexts always override it)
|
||||
#
|
||||
# WHY BOTH?
|
||||
# - Explicit contexts: Clear, self-documenting, always works
|
||||
# - Global default: Safety net for forgotten contexts or library code
|
||||
# - If someone writes "Process()" instead of "ctx.Process()", still safe!
|
||||
#
|
||||
# See: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import sys
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
try:
|
||||
if multiprocessing.get_start_method(allow_none=True) is None:
|
||||
multiprocessing.set_start_method('spawn', force=False)
|
||||
logger.debug("Set multiprocessing default to 'spawn' for thread safety (explicit contexts used everywhere)")
|
||||
except RuntimeError:
|
||||
logger.debug(f"Multiprocessing start method already set: {multiprocessing.get_start_method()}")
|
||||
|
||||
# Only global so we can access it in the signal handler
|
||||
app = None
|
||||
datastore = None
|
||||
@@ -74,6 +126,12 @@ def main():
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Optional URL to watch since start
|
||||
default_url = None
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
|
||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
@@ -87,15 +145,13 @@ def main():
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port")
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
# Set a logger level via shell env variable
|
||||
# Used: Dockerfile for CICD
|
||||
# To set logger level for pytest, see the app function in tests/conftest.py
|
||||
@@ -116,6 +172,10 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-u':
|
||||
default_url = arg
|
||||
include_default_watches = False
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
@@ -157,6 +217,11 @@ def main():
|
||||
" WARNING, ERROR, CRITICAL")
|
||||
sys.exit(2)
|
||||
|
||||
# Disable verbose pyppeteer logging to prevent memory leaks from large CDP messages
|
||||
# Set both parent and child loggers since pyppeteer hardcodes DEBUG level
|
||||
logging.getLogger('pyppeteer.connection').setLevel(logging.WARNING)
|
||||
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
|
||||
@@ -172,13 +237,20 @@ def main():
|
||||
sys.exit(2)
|
||||
|
||||
try:
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__, include_default_watches=include_default_watches)
|
||||
except JSONDecodeError as e:
|
||||
# Dont' start if the JSON DB looks corrupt
|
||||
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
|
||||
logger.critical(str(e))
|
||||
return
|
||||
|
||||
# Inject datastore into plugins that need access to settings
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
if default_url:
|
||||
datastore.add_watch(url = default_url)
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
|
||||
@@ -1,9 +1,22 @@
|
||||
import os
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request
|
||||
import validators
|
||||
from functools import wraps
|
||||
from . import auth, validate_openapi_request
|
||||
from ..validate_url import is_safe_valid_url
|
||||
|
||||
|
||||
def default_content_type(content_type='text/plain'):
|
||||
"""Decorator to set a default Content-Type header if none is provided."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
if not request.content_type:
|
||||
# Set default content type in the request environment
|
||||
request.environ['CONTENT_TYPE'] = content_type
|
||||
return f(*args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
class Import(Resource):
|
||||
@@ -12,6 +25,7 @@ class Import(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@default_content_type('text/plain') #3547 #3542
|
||||
@validate_openapi_request('importWatches')
|
||||
def post(self):
|
||||
"""Import a list of watched URLs."""
|
||||
@@ -35,14 +49,13 @@ class Import(Resource):
|
||||
|
||||
urls = request.get_data().decode('utf8').splitlines()
|
||||
added = []
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
if not validators.url(url, simple_host=allow_simplehost):
|
||||
if not is_safe_valid_url(url):
|
||||
return f"Invalid or unsupported URL - {url}", 400
|
||||
|
||||
if dedupe and self.datastore.url_exists(url):
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
import os
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from flask_expects_json import expects_json
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request, make_response, send_from_directory
|
||||
import validators
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_handler
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
import copy
|
||||
|
||||
# Import schemas from __init__.py
|
||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
|
||||
from ..notification import valid_notification_formats
|
||||
from ..notification.handler import newline_re
|
||||
|
||||
|
||||
def validate_time_between_check_required(json_data):
|
||||
@@ -121,7 +124,64 @@ class Watch(Resource):
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
watch.update(request.json)
|
||||
# XSS etc protection
|
||||
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
|
||||
return "Invalid URL", 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
processor_config_data = {}
|
||||
regular_data = {}
|
||||
|
||||
for key, value in request.json.items():
|
||||
if key.startswith('processor_config_'):
|
||||
config_key = key.replace('processor_config_', '')
|
||||
if value: # Only save non-empty values
|
||||
processor_config_data[config_key] = value
|
||||
else:
|
||||
regular_data[key] = value
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(regular_data)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = request.json.get('processor', watch.get('processor'))
|
||||
if processor_name:
|
||||
# Create a processor instance to access config methods
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API: Failed to save processor config: {e}")
|
||||
|
||||
return "OK", 200
|
||||
|
||||
@@ -171,12 +231,120 @@ class WatchSingleHistory(Resource):
|
||||
response = make_response("No content found", 404)
|
||||
response.mimetype = "text/plain"
|
||||
else:
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
content = watch.get_history_snapshot(timestamp=timestamp)
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = "text/plain"
|
||||
|
||||
return response
|
||||
|
||||
class WatchHistoryDiff(Resource):
|
||||
"""
|
||||
Generate diff between two historical snapshots.
|
||||
|
||||
Note: This API endpoint currently returns text-based diffs and works best
|
||||
with the text_json_diff processor. Future processor types (like image_diff,
|
||||
restock_diff) may want to implement their own specialized API endpoints
|
||||
for returning processor-specific data (e.g., price charts, image comparisons).
|
||||
|
||||
The web UI diff page (/diff/<uuid>) is processor-aware and delegates rendering
|
||||
to processors/{type}/difference.py::render() for processor-specific visualizations.
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistoryDiff')
|
||||
def get(self, uuid, from_timestamp, to_timestamp):
|
||||
"""Generate diff between two historical snapshots."""
|
||||
from changedetectionio import diff
|
||||
from changedetectionio.notification.handler import apply_service_tweaks
|
||||
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
|
||||
if not len(watch.history):
|
||||
abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
|
||||
|
||||
history_keys = list(watch.history.keys())
|
||||
|
||||
# Handle 'latest' keyword for to_timestamp
|
||||
if to_timestamp == 'latest':
|
||||
to_timestamp = history_keys[-1]
|
||||
|
||||
# Handle 'previous' keyword for from_timestamp (second-most-recent)
|
||||
if from_timestamp == 'previous':
|
||||
if len(history_keys) < 2:
|
||||
abort(404, message=f"Not enough history entries. Need at least 2 snapshots for 'previous'")
|
||||
from_timestamp = history_keys[-2]
|
||||
|
||||
# Validate timestamps exist
|
||||
if from_timestamp not in watch.history:
|
||||
abort(404, message=f"From timestamp {from_timestamp} not found in watch history")
|
||||
if to_timestamp not in watch.history:
|
||||
abort(404, message=f"To timestamp {to_timestamp} not found in watch history")
|
||||
|
||||
# Get the format parameter (default to 'text')
|
||||
output_format = request.args.get('format', 'text').lower()
|
||||
|
||||
# Validate format
|
||||
if output_format not in valid_notification_formats.keys():
|
||||
abort(400, message=f"Invalid format. Must be one of: {', '.join(valid_notification_formats.keys())}")
|
||||
|
||||
# Get the word_diff parameter (default to False - line-level mode)
|
||||
word_diff = strtobool(request.args.get('word_diff', 'false'))
|
||||
|
||||
# Get the no_markup parameter (default to False)
|
||||
no_markup = strtobool(request.args.get('no_markup', 'false'))
|
||||
|
||||
# Retrieve snapshot contents
|
||||
from_version_file_contents = watch.get_history_snapshot(from_timestamp)
|
||||
to_version_file_contents = watch.get_history_snapshot(to_timestamp)
|
||||
|
||||
# Get diff preferences (using defaults similar to the existing code)
|
||||
diff_prefs = {
|
||||
'diff_ignoreWhitespace': False,
|
||||
'diff_changesOnly': True
|
||||
}
|
||||
|
||||
# Generate the diff
|
||||
content = diff.render_diff(
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=diff_prefs.get('diff_ignoreWhitespace'),
|
||||
include_equal=not diff_prefs.get('diff_changesOnly'),
|
||||
word_diff=word_diff,
|
||||
)
|
||||
|
||||
# Skip formatting if no_markup is set
|
||||
if no_markup:
|
||||
mimetype = "text/plain"
|
||||
else:
|
||||
# Apply formatting based on the requested format
|
||||
if output_format == 'htmlcolor':
|
||||
from changedetectionio.notification.handler import apply_html_color_to_body
|
||||
content = apply_html_color_to_body(n_body=content)
|
||||
mimetype = "text/html"
|
||||
else:
|
||||
# Apply service tweaks for text/html formats
|
||||
# Pass empty URL and title as they're not used for the placeholder replacement we need
|
||||
_, content, _ = apply_service_tweaks(
|
||||
url='',
|
||||
n_body=content,
|
||||
n_title='',
|
||||
requested_output_format=output_format
|
||||
)
|
||||
mimetype = "text/html" if output_format == 'html' else "text/plain"
|
||||
|
||||
if 'html' in output_format:
|
||||
content = newline_re.sub('<br>\r\n', content)
|
||||
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = mimetype
|
||||
return response
|
||||
|
||||
|
||||
class WatchFavicon(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
@@ -226,9 +394,7 @@ class CreateWatch(Resource):
|
||||
json_data = request.get_json()
|
||||
url = json_data['url'].strip()
|
||||
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
if not validators.url(url, simple_host=allow_simplehost):
|
||||
if not is_safe_valid_url(url):
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
if json_data.get('proxy'):
|
||||
|
||||
@@ -41,7 +41,7 @@ def get_openapi_spec():
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r') as f:
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
@@ -51,6 +51,7 @@ def validate_openapi_request(operation_id):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
from werkzeug.exceptions import BadRequest
|
||||
try:
|
||||
# Skip OpenAPI validation for GET requests since they don't have request bodies
|
||||
if request.method.upper() != 'GET':
|
||||
@@ -61,7 +62,6 @@ def validate_openapi_request(operation_id):
|
||||
openapi_request = FlaskOpenAPIRequest(request)
|
||||
result = spec.unmarshal_request(openapi_request)
|
||||
if result.errors:
|
||||
from werkzeug.exceptions import BadRequest
|
||||
error_details = []
|
||||
for error in result.errors:
|
||||
error_details.append(str(error))
|
||||
@@ -78,7 +78,7 @@ def validate_openapi_request(operation_id):
|
||||
return decorator
|
||||
|
||||
# Import all API resources
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, WatchFavicon
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
|
||||
@@ -96,7 +96,10 @@ def build_watch_json_schema(d):
|
||||
"enum": ["html_requests", "html_webdriver"]
|
||||
})
|
||||
|
||||
|
||||
schema['properties']['processor'] = {"anyOf": [
|
||||
{"type": "string", "enum": ["restock_diff", "text_json_diff"]},
|
||||
{"type": "null"}
|
||||
]}
|
||||
|
||||
# All headers must be key/value type dict
|
||||
schema['properties']['headers'] = {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from blinker import signal
|
||||
|
||||
from .processors.exceptions import ProcessorException
|
||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
@@ -40,13 +42,13 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
try:
|
||||
# Use native janus async interface - no threads needed!
|
||||
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
|
||||
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available, continue loop
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
|
||||
|
||||
|
||||
# Log queue health for debugging
|
||||
try:
|
||||
queue_size = q.qsize()
|
||||
@@ -54,15 +56,28 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
|
||||
except Exception as health_e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
|
||||
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed
|
||||
|
||||
# RACE CONDITION FIX: Check if this UUID is already being processed by another worker
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
logger.trace(f"Worker {worker_id} skipping UUID {uuid} - already being processed, re-queuing for later")
|
||||
# Re-queue with MUCH lower priority (higher number = processed later)
|
||||
# This prevents tight loop where high-priority item keeps getting picked immediately
|
||||
deferred_priority = max(1000, queued_item_data.priority * 10)
|
||||
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
|
||||
worker_handler.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
await asyncio.sleep(0.1) # Brief pause to avoid tight loop
|
||||
continue
|
||||
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed
|
||||
worker_handler.set_uuid_processing(uuid, processing=True)
|
||||
|
||||
try:
|
||||
@@ -87,9 +102,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Init a new 'difference_detection_processor'
|
||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
||||
try:
|
||||
processor_module = importlib.import_module(processor_module_name)
|
||||
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
@@ -97,6 +111,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid)
|
||||
|
||||
update_signal = signal('watch_small_status_comment')
|
||||
update_signal.send(watch_uuid=uuid, status="Fetching page..")
|
||||
|
||||
# All fetchers are now async, so call directly
|
||||
await update_handler.call_browser()
|
||||
|
||||
@@ -309,6 +326,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler.xpath_data else 'empty.'}")
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
@@ -326,7 +344,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
fetch_start_time += 1
|
||||
await asyncio.sleep(1)
|
||||
|
||||
watch.save_history_text(contents=contents,
|
||||
watch.save_history_blob(contents=contents,
|
||||
timestamp=int(fetch_start_time),
|
||||
snapshot_id=update_obj.get('previous_md5', 'none'))
|
||||
|
||||
@@ -353,12 +371,15 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
count = watch.get('check_count', 0) + 1
|
||||
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
if page_title:
|
||||
page_title = page_title.strip()[:2000]
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
|
||||
# Record server header
|
||||
try:
|
||||
@@ -391,6 +412,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"})
|
||||
|
||||
finally:
|
||||
|
||||
try:
|
||||
await update_handler.fetcher.quit(watch=watch)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception while cleaning/quit after calling browser: {e}")
|
||||
|
||||
# Always cleanup - this runs whether there was an exception or not
|
||||
if uuid:
|
||||
try:
|
||||
@@ -424,6 +451,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||
# 4. It would just cause confusion
|
||||
|
||||
# Force garbage collection after cleanup
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
||||
|
||||
@@ -3,31 +3,31 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h4>Backups</h4>
|
||||
<h4>{{ _('Backups') }}</h4>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<strong>A backup is running!</strong>
|
||||
<strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
Here you can download and request a new backup, when a backup is completed you will see it listed below.
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} Mb</li>
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>No backups found.</strong>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">Create backup</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">Remove backups</a>
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -439,7 +439,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
logger.warning("Attempted to get current state after cleanup")
|
||||
return (None, None)
|
||||
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding="utf-8")
|
||||
|
||||
now = time.time()
|
||||
await self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#url-list">URL List</a></li>
|
||||
<li class="tab"><a href="#distill-io">Distill.io</a></li>
|
||||
<li class="tab"><a href="#xlsx">.XLSX & Wachete</a></li>
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -17,11 +17,10 @@
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
<div class="pure-control-group">
|
||||
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
|
||||
(,):
|
||||
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
||||
<br>
|
||||
<p><strong>Example: </strong><code>https://example.com tag1, tag2, last tag</code></p>
|
||||
URLs which do not pass validation will stay in the textarea.
|
||||
<p><strong>{{ _('Example:') }} </strong><code>https://example.com tag1, tag2, last tag</code></p>
|
||||
{{ _('URLs which do not pass validation will stay in the textarea.') }}
|
||||
</div>
|
||||
{{ render_field(form.processor, class="processor") }}
|
||||
|
||||
@@ -42,12 +41,12 @@
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
|
||||
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
|
||||
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
||||
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
||||
<br>
|
||||
<p>
|
||||
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
||||
Be sure to set your default fetcher to Chrome if required.<br>
|
||||
{{ _('How to export?') }} <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
||||
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -89,32 +88,32 @@
|
||||
</fieldset>
|
||||
<div class="pure-control-group">
|
||||
<span class="pure-form-message-inline">
|
||||
Table of custom column and data types mapping for the <strong>Custom mapping</strong> File mapping type.
|
||||
{{ _('Table of custom column and data types mapping for the') }} <strong>{{ _('Custom mapping') }}</strong> {{ _('File mapping type.') }}
|
||||
</span>
|
||||
<table style="border: 1px solid #aaa; padding: 0.5rem; border-radius: 4px;">
|
||||
<tr>
|
||||
<td><strong>Column #</strong></td>
|
||||
<td><strong>{{ _('Column #') }}</strong></td>
|
||||
{% for n in range(4) %}
|
||||
<td><input type="number" name="custom_xlsx[col_{{n}}]" style="width: 4rem;" min="1"></td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Type</strong></td>
|
||||
<td><strong>{{ _('Type') }}</strong></td>
|
||||
{% for n in range(4) %}
|
||||
<td><select name="custom_xlsx[col_type_{{n}}]">
|
||||
<option value="" style="color: #aaa"> -- none --</option>
|
||||
<option value="url">URL</option>
|
||||
<option value="title">Title</option>
|
||||
<option value="include_filters">CSS/xPath filter</option>
|
||||
<option value="tag">Group / Tag name(s)</option>
|
||||
<option value="interval_minutes">Recheck time (minutes)</option>
|
||||
<option value="" style="color: #aaa"> -- {{ _('none') }} --</option>
|
||||
<option value="url">{{ _('URL') }}</option>
|
||||
<option value="title">{{ _('Title') }}</option>
|
||||
<option value="include_filters">{{ _('CSS/xPath filter') }}</option>
|
||||
<option value="tag">{{ _('Group / Tag name(s)') }}</option>
|
||||
<option value="interval_minutes">{{ _('Recheck time (minutes)') }}</option>
|
||||
</select></td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -1 +1,27 @@
|
||||
RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from changedetectionio.notification import valid_notification_formats
|
||||
|
||||
RSS_CONTENT_FORMAT_DEFAULT = 'text'
|
||||
|
||||
# Some stuff not related
|
||||
RSS_FORMAT_TYPES = deepcopy(valid_notification_formats)
|
||||
if RSS_FORMAT_TYPES.get('markdown'):
|
||||
del RSS_FORMAT_TYPES['markdown']
|
||||
|
||||
if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH):
|
||||
del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH]
|
||||
|
||||
if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT):
|
||||
logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}")
|
||||
|
||||
RSS_TEMPLATE_TYPE_OPTIONS = {'system_default': 'System default', 'notification_body': 'Notification body'}
|
||||
|
||||
# @note: We use <pre> because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
|
||||
RSS_TEMPLATE_PLAINTEXT_DEFAULT = "<pre>{{watch_label}} had a change.\n\n{{diff}}\n</pre>"
|
||||
|
||||
# @todo add some [edit]/[history]/[goto] etc links
|
||||
# @todo need {{watch_edit_link}} + delete + history link token
|
||||
RSS_TEMPLATE_HTML_DEFAULT = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_label}}</a></h4>\n<p>{{diff}}</p>\n</body></html>\n"
|
||||
|
||||
156
changedetectionio/blueprint/rss/_util.py
Normal file
156
changedetectionio/blueprint/rss/_util.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""
|
||||
Utility functions for RSS feed generation.
|
||||
"""
|
||||
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
from changedetectionio.notification_service import NotificationContextData, _check_cascading_vars
|
||||
from loguru import logger
|
||||
import datetime
|
||||
import pytz
|
||||
import re
|
||||
|
||||
|
||||
BAD_CHARS_REGEX = r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
|
||||
|
||||
|
||||
def scan_invalid_chars_in_rss(content):
|
||||
"""
|
||||
Scan for invalid characters in RSS content.
|
||||
Returns True if invalid characters are found.
|
||||
"""
|
||||
for match in re.finditer(BAD_CHARS_REGEX, content):
|
||||
i = match.start()
|
||||
bad_char = content[i]
|
||||
hex_value = f"0x{ord(bad_char):02x}"
|
||||
# Grab context
|
||||
start = max(0, i - 20)
|
||||
end = min(len(content), i + 21)
|
||||
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
|
||||
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
|
||||
# First match is enough
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def clean_entry_content(content):
|
||||
"""
|
||||
Remove invalid characters from RSS content.
|
||||
"""
|
||||
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
|
||||
return cleaned
|
||||
|
||||
|
||||
def generate_watch_guid(watch, timestamp):
|
||||
"""
|
||||
Generate a unique GUID for a watch RSS entry.
|
||||
|
||||
Args:
|
||||
watch: The watch object
|
||||
timestamp: The timestamp of the specific change this entry represents
|
||||
"""
|
||||
return f"{watch['uuid']}/{timestamp}"
|
||||
|
||||
|
||||
def validate_rss_token(datastore, request):
|
||||
"""
|
||||
Validate the RSS access token from the request.
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid, error_response) where error_response is None if valid
|
||||
"""
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
|
||||
rss_url_token = request.args.get('token')
|
||||
|
||||
if rss_url_token != app_rss_token:
|
||||
return False, ("Access denied, bad token", 403)
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
def get_rss_template(datastore, watch, rss_content_format, default_html, default_plaintext):
|
||||
"""Get the appropriate template for RSS content."""
|
||||
if datastore.data['settings']['application'].get('rss_template_type') == 'notification_body':
|
||||
return _check_cascading_vars(datastore=datastore, var_name='notification_body', watch=watch)
|
||||
|
||||
override = datastore.data['settings']['application'].get('rss_template_override')
|
||||
if override and override.strip():
|
||||
return override
|
||||
elif 'text' in rss_content_format:
|
||||
return default_plaintext
|
||||
else:
|
||||
return default_html
|
||||
|
||||
|
||||
def get_watch_label(datastore, watch):
|
||||
"""Get the label for a watch based on settings."""
|
||||
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
|
||||
return watch.label
|
||||
else:
|
||||
return watch.get('url')
|
||||
|
||||
|
||||
def add_watch_categories(fe, watch, datastore):
|
||||
"""Add category tags to a feed entry based on watch tags."""
|
||||
for tag_uuid in watch.get('tags', []):
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
||||
if tag and tag.get('title'):
|
||||
fe.category(term=tag.get('title'))
|
||||
|
||||
|
||||
def build_notification_context(watch, timestamp_from, timestamp_to, watch_label,
|
||||
n_body_template, rss_content_format):
|
||||
"""Build the notification context object."""
|
||||
return NotificationContextData(initial_data={
|
||||
'notification_urls': ['null://just-sending-a-null-test-for-the-render-in-RSS'],
|
||||
'notification_body': n_body_template,
|
||||
'timestamp_to': timestamp_to,
|
||||
'timestamp_from': timestamp_from,
|
||||
'watch_label': watch_label,
|
||||
'notification_format': rss_content_format
|
||||
})
|
||||
|
||||
|
||||
def render_notification(n_object, notification_service, watch, datastore,
|
||||
date_index_from=None, date_index_to=None):
|
||||
"""Process and render the notification content."""
|
||||
kwargs = {'n_object': n_object, 'watch': watch}
|
||||
|
||||
if date_index_from is not None and date_index_to is not None:
|
||||
kwargs['date_index_from'] = date_index_from
|
||||
kwargs['date_index_to'] = date_index_to
|
||||
|
||||
n_object = notification_service.queue_notification_for_watch(**kwargs)
|
||||
n_object['watch_mime_type'] = None
|
||||
|
||||
res = process_notification(n_object=n_object, datastore=datastore)
|
||||
return res[0]
|
||||
|
||||
|
||||
def populate_feed_entry(fe, watch, content, guid, timestamp, link=None, title_suffix=None):
|
||||
"""Populate a feed entry with content and metadata."""
|
||||
watch_label = watch.get('url') # Already determined by caller
|
||||
|
||||
# Set link
|
||||
if link:
|
||||
fe.link(link=link)
|
||||
|
||||
# Set title
|
||||
if title_suffix:
|
||||
fe.title(title=f"{watch_label} - {title_suffix}")
|
||||
else:
|
||||
fe.title(title=watch_label)
|
||||
|
||||
# Clean and set content
|
||||
if scan_invalid_chars_in_rss(content):
|
||||
content = clean_entry_content(content)
|
||||
fe.content(content=content, type='CDATA')
|
||||
|
||||
# Set GUID
|
||||
fe.guid(guid, permalink=False)
|
||||
|
||||
# Set pubDate using the timestamp of this specific change
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
fe.pubDate(dt)
|
||||
|
||||
@@ -1,155 +1,26 @@
|
||||
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.notification.handler import apply_service_tweaks
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from feedgen.feed import FeedGenerator
|
||||
from flask import Blueprint, make_response, request, url_for, redirect
|
||||
from loguru import logger
|
||||
import datetime
|
||||
import pytz
|
||||
import re
|
||||
import time
|
||||
from flask import Blueprint
|
||||
|
||||
|
||||
BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
|
||||
|
||||
# Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc)
|
||||
def scan_invalid_chars_in_rss(content):
|
||||
for match in re.finditer(BAD_CHARS_REGEX, content):
|
||||
i = match.start()
|
||||
bad_char = content[i]
|
||||
hex_value = f"0x{ord(bad_char):02x}"
|
||||
# Grab context
|
||||
start = max(0, i - 20)
|
||||
end = min(len(content), i + 21)
|
||||
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
|
||||
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
|
||||
# First match is enough
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def clean_entry_content(content):
|
||||
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
|
||||
return cleaned
|
||||
from . import tag as tag_routes
|
||||
from . import main_feed
|
||||
from . import single_watch
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
"""
|
||||
Construct and configure the RSS blueprint with all routes.
|
||||
|
||||
Args:
|
||||
datastore: The ChangeDetectionStore instance
|
||||
|
||||
Returns:
|
||||
The configured Flask blueprint
|
||||
"""
|
||||
rss_blueprint = Blueprint('rss', __name__)
|
||||
|
||||
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
|
||||
# to some earlier blueprint rerouting work, it should goto feed.
|
||||
@rss_blueprint.route("/", methods=['GET'])
|
||||
def extraslash():
|
||||
return redirect(url_for('rss.feed'))
|
||||
|
||||
# Import the login decorator if needed
|
||||
# from changedetectionio.auth_decorator import login_optionally_required
|
||||
@rss_blueprint.route("", methods=['GET'])
|
||||
def feed():
|
||||
now = time.time()
|
||||
# Always requires token set
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
|
||||
rss_url_token = request.args.get('token')
|
||||
if rss_url_token != app_rss_token:
|
||||
return "Access denied, bad token", 403
|
||||
|
||||
from changedetectionio import diff
|
||||
limit_tag = request.args.get('tag', '').lower().strip()
|
||||
# Be sure limit_tag is a uuid
|
||||
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||
if limit_tag == tag.get('title', '').lower().strip():
|
||||
limit_tag = uuid
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
|
||||
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
# @todo tag notification_muted skip also (improve Watch model)
|
||||
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||
continue
|
||||
if limit_tag and not limit_tag in watch['tags']:
|
||||
continue
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
|
||||
|
||||
fg = FeedGenerator()
|
||||
fg.title('changedetection.io')
|
||||
fg.description('Feed description')
|
||||
fg.link(href='https://changedetection.io')
|
||||
|
||||
html_colour_enable = False
|
||||
if datastore.data['settings']['application'].get('rss_content_format') == 'html':
|
||||
html_colour_enable = True
|
||||
|
||||
for watch in sorted_watches:
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
|
||||
if len(dates) < 2:
|
||||
continue
|
||||
|
||||
if not watch.viewed:
|
||||
# Re #239 - GUID needs to be individual for each event
|
||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
|
||||
fe = fg.add_entry()
|
||||
|
||||
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
|
||||
# Description is the page you watch, link takes you to the diff JS UI page
|
||||
# Dict val base_url will get overriden with the env var if it is set.
|
||||
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||
# @todo fix
|
||||
|
||||
# Because we are called via whatever web server, flask should figure out the right path (
|
||||
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
|
||||
fe.link(link=diff_link)
|
||||
|
||||
# Same logic as watch-overview.html
|
||||
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
|
||||
watch_label = watch.label
|
||||
else:
|
||||
watch_label = watch.get('url')
|
||||
|
||||
fe.title(title=watch_label)
|
||||
try:
|
||||
|
||||
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||
include_equal=False,
|
||||
line_feed_sep="<br>"
|
||||
)
|
||||
|
||||
|
||||
requested_output_format = 'htmlcolor' if html_colour_enable else 'html'
|
||||
html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
|
||||
|
||||
# @todo Make this configurable and also consider html-colored markup
|
||||
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
||||
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
||||
|
||||
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
|
||||
|
||||
# Out of range chars could also break feedgen
|
||||
if scan_invalid_chars_in_rss(content):
|
||||
content = clean_entry_content(content)
|
||||
|
||||
fe.content(content=content, type='CDATA')
|
||||
fe.guid(guid, permalink=False)
|
||||
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
fe.pubDate(dt)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
|
||||
return response
|
||||
# Register all route modules
|
||||
main_feed.construct_main_feed_routes(rss_blueprint, datastore)
|
||||
single_watch.construct_single_watch_routes(rss_blueprint, datastore)
|
||||
tag_routes.construct_tag_routes(rss_blueprint, datastore)
|
||||
|
||||
return rss_blueprint
|
||||
105
changedetectionio/blueprint/rss/main_feed.py
Normal file
105
changedetectionio/blueprint/rss/main_feed.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from flask import make_response, request, url_for, redirect
|
||||
|
||||
|
||||
|
||||
def construct_main_feed_routes(rss_blueprint, datastore):
|
||||
"""
|
||||
Construct the main RSS feed routes.
|
||||
|
||||
Args:
|
||||
rss_blueprint: The Flask blueprint to add routes to
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
|
||||
# to some earlier blueprint rerouting work, it should goto feed.
|
||||
@rss_blueprint.route("/", methods=['GET'])
|
||||
def extraslash():
|
||||
return redirect(url_for('rss.feed'))
|
||||
|
||||
# Import the login decorator if needed
|
||||
# from changedetectionio.auth_decorator import login_optionally_required
|
||||
@rss_blueprint.route("", methods=['GET'])
|
||||
def feed():
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
|
||||
from ._util import (validate_rss_token, generate_watch_guid, get_rss_template,
|
||||
get_watch_label, build_notification_context, render_notification,
|
||||
populate_feed_entry, add_watch_categories)
|
||||
from ...notification_service import NotificationService
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Validate token
|
||||
is_valid, error = validate_rss_token(datastore, request)
|
||||
if not is_valid:
|
||||
return error
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
limit_tag = request.args.get('tag', '').lower().strip()
|
||||
# Be sure limit_tag is a uuid
|
||||
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||
if limit_tag == tag.get('title', '').lower().strip():
|
||||
limit_tag = uuid
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
|
||||
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
# @todo tag notification_muted skip also (improve Watch model)
|
||||
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||
continue
|
||||
if limit_tag and not limit_tag in watch['tags']:
|
||||
continue
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
|
||||
|
||||
fg = FeedGenerator()
|
||||
fg.title('changedetection.io')
|
||||
fg.description('Feed description')
|
||||
fg.link(href='https://changedetection.io')
|
||||
notification_service = NotificationService(datastore=datastore, notification_q=False)
|
||||
|
||||
for watch in sorted_watches:
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
|
||||
if len(dates) < 2:
|
||||
continue
|
||||
|
||||
if not watch.viewed:
|
||||
# Re #239 - GUID needs to be individual for each event
|
||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||
watch_label = get_watch_label(datastore, watch)
|
||||
timestamp_to = dates[-1]
|
||||
timestamp_from = dates[-2]
|
||||
guid = generate_watch_guid(watch, timestamp_to)
|
||||
# Because we are called via whatever web server, flask should figure out the right path
|
||||
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
|
||||
# Get template and build notification context
|
||||
n_body_template = get_rss_template(datastore, watch, rss_content_format,
|
||||
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
|
||||
|
||||
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
|
||||
watch_label, n_body_template, rss_content_format)
|
||||
|
||||
# Render notification
|
||||
res = render_notification(n_object, notification_service, watch, datastore)
|
||||
|
||||
# Create and populate feed entry
|
||||
fe = fg.add_entry()
|
||||
populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link)
|
||||
fe.title(title=watch_label) # Override title to not include suffix
|
||||
add_watch_categories(fe, watch, datastore)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
|
||||
return response
|
||||
115
changedetectionio/blueprint/rss/single_watch.py
Normal file
115
changedetectionio/blueprint/rss/single_watch.py
Normal file
@@ -0,0 +1,115 @@
|
||||
|
||||
|
||||
def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
"""
|
||||
Construct RSS feed routes for single watches.
|
||||
|
||||
Args:
|
||||
rss_blueprint: The Flask blueprint to add routes to
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
|
||||
def rss_single_watch(uuid):
|
||||
import time
|
||||
|
||||
from flask import make_response, request
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
|
||||
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
|
||||
from ._util import (validate_rss_token, get_rss_template, get_watch_label,
|
||||
build_notification_context, render_notification,
|
||||
populate_feed_entry, add_watch_categories)
|
||||
from ...notification_service import NotificationService
|
||||
|
||||
"""
|
||||
Display the most recent changes for a single watch as RSS feed.
|
||||
Returns RSS XML with multiple entries showing diffs between consecutive snapshots.
|
||||
The number of entries is controlled by the rss_diff_length setting.
|
||||
"""
|
||||
now = time.time()
|
||||
|
||||
# Validate token
|
||||
is_valid, error = validate_rss_token(datastore, request)
|
||||
if not is_valid:
|
||||
return error
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return f"Watch with UUID {uuid} not found", 404
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
|
||||
|
||||
# Add uuid to watch for proper functioning
|
||||
watch['uuid'] = uuid
|
||||
|
||||
# Get the number of diffs to include (default: 5)
|
||||
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
||||
|
||||
# Calculate how many diffs we can actually show (limited by available history)
|
||||
# We need at least 2 snapshots to create 1 diff
|
||||
max_possible_diffs = len(dates) - 1
|
||||
num_diffs = min(rss_diff_length, max_possible_diffs) if rss_diff_length > 0 else max_possible_diffs
|
||||
|
||||
# Create RSS feed
|
||||
fg = FeedGenerator()
|
||||
|
||||
# Set title: use "label (url)" if label differs from url, otherwise just url
|
||||
watch_url = watch.get('url', '')
|
||||
watch_label = get_watch_label(datastore, watch)
|
||||
|
||||
if watch_label != watch_url:
|
||||
feed_title = f'changedetection.io - {watch_label} ({watch_url})'
|
||||
else:
|
||||
feed_title = f'changedetection.io - {watch_url}'
|
||||
|
||||
fg.title(feed_title)
|
||||
fg.description('Changes')
|
||||
fg.link(href='https://changedetection.io')
|
||||
|
||||
# Loop through history and create RSS entries for each diff
|
||||
# Add entries in reverse order because feedgen reverses them
|
||||
# This way, the newest change appears first in the final RSS
|
||||
|
||||
notification_service = NotificationService(datastore=datastore, notification_q=False)
|
||||
for i in range(num_diffs - 1, -1, -1):
|
||||
# Calculate indices for this diff (working backwards from newest)
|
||||
# i=0: compare dates[-2] to dates[-1] (most recent change)
|
||||
# i=1: compare dates[-3] to dates[-2] (previous change)
|
||||
# etc.
|
||||
date_index_to = -(i + 1)
|
||||
date_index_from = -(i + 2)
|
||||
timestamp_to = dates[date_index_to]
|
||||
timestamp_from = dates[date_index_from]
|
||||
|
||||
# Get template and build notification context
|
||||
n_body_template = get_rss_template(datastore, watch, rss_content_format,
|
||||
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
|
||||
|
||||
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
|
||||
watch_label, n_body_template, rss_content_format)
|
||||
|
||||
# Render notification with date indices
|
||||
res = render_notification(n_object, notification_service, watch, datastore,
|
||||
date_index_from, date_index_to)
|
||||
|
||||
# Create and populate feed entry
|
||||
guid = f"{watch['uuid']}/{timestamp_to}"
|
||||
fe = fg.add_entry()
|
||||
title_suffix = f"Change @ {res['original_context']['change_datetime']}"
|
||||
populate_feed_entry(fe, watch, res.get('body', ''), guid, timestamp_to,
|
||||
link={'href': watch.get('url')}, title_suffix=title_suffix)
|
||||
add_watch_categories(fe, watch, datastore)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
logger.debug(f"RSS Single watch built in {time.time()-now:.2f}s")
|
||||
|
||||
return response
|
||||
98
changedetectionio/blueprint/rss/tag.py
Normal file
98
changedetectionio/blueprint/rss/tag.py
Normal file
@@ -0,0 +1,98 @@
|
||||
def construct_tag_routes(rss_blueprint, datastore):
|
||||
"""
|
||||
Construct RSS feed routes for tags.
|
||||
|
||||
Args:
|
||||
rss_blueprint: The Flask blueprint to add routes to
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
from feedgen.feed import FeedGenerator
|
||||
|
||||
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
|
||||
from ._util import (validate_rss_token, generate_watch_guid, get_rss_template,
|
||||
get_watch_label, build_notification_context, render_notification,
|
||||
populate_feed_entry, add_watch_categories)
|
||||
from ...notification_service import NotificationService
|
||||
|
||||
"""
|
||||
Display an RSS feed for all unviewed watches that belong to a specific tag.
|
||||
Returns RSS XML with entries for each unviewed watch with sufficient history.
|
||||
"""
|
||||
# Validate token
|
||||
is_valid, error = validate_rss_token(datastore, request)
|
||||
if not is_valid:
|
||||
return error
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
# Verify tag exists
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
||||
if not tag:
|
||||
return f"Tag with UUID {tag_uuid} not found", 404
|
||||
|
||||
tag_title = tag.get('title', 'Unknown Tag')
|
||||
|
||||
# Create RSS feed
|
||||
fg = FeedGenerator()
|
||||
fg.title(f'changedetection.io - {tag_title}')
|
||||
fg.description(f'Changes for watches tagged with {tag_title}')
|
||||
fg.link(href='https://changedetection.io')
|
||||
notification_service = NotificationService(datastore=datastore, notification_q=False)
|
||||
# Find all watches with this tag
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
#@todo This is wrong, it needs to sort by most recently changed and then limit it datastore.data['watching'].items().sorted(?)
|
||||
# So get all watches in this tag then sort
|
||||
|
||||
# Skip if watch doesn't have this tag
|
||||
if tag_uuid not in watch.get('tags', []):
|
||||
continue
|
||||
|
||||
# Skip muted watches if configured
|
||||
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||
continue
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
continue
|
||||
|
||||
# Only include unviewed watches
|
||||
if not watch.viewed:
|
||||
# Add uuid to watch for proper functioning
|
||||
watch['uuid'] = uuid
|
||||
|
||||
# Include a link to the diff page
|
||||
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
|
||||
# Get watch label
|
||||
watch_label = get_watch_label(datastore, watch)
|
||||
|
||||
# Get template and build notification context
|
||||
timestamp_to = dates[-1]
|
||||
timestamp_from = dates[-2]
|
||||
|
||||
# Generate GUID for this entry
|
||||
guid = generate_watch_guid(watch, timestamp_to)
|
||||
n_body_template = get_rss_template(datastore, watch, rss_content_format,
|
||||
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
|
||||
|
||||
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
|
||||
watch_label, n_body_template, rss_content_format)
|
||||
|
||||
# Render notification
|
||||
res = render_notification(n_object, notification_service, watch, datastore)
|
||||
|
||||
# Create and populate feed entry
|
||||
fe = fg.add_entry()
|
||||
title_suffix = f"Change @ {res['original_context']['change_datetime']}"
|
||||
populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link, title_suffix=title_suffix)
|
||||
add_watch_categories(fe, watch, datastore)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
return response
|
||||
@@ -17,6 +17,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@login_optionally_required
|
||||
def settings_page():
|
||||
from changedetectionio import forms
|
||||
from changedetectionio.pluggy_interface import (
|
||||
get_plugin_settings_tabs,
|
||||
load_plugin_settings,
|
||||
save_plugin_settings
|
||||
)
|
||||
|
||||
|
||||
default = deepcopy(datastore.data['settings'])
|
||||
if datastore.proxy_list is not None:
|
||||
@@ -102,6 +108,20 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Also save plugin settings from the same form submission
|
||||
plugin_tabs_list = get_plugin_settings_tabs()
|
||||
for tab in plugin_tabs_list:
|
||||
plugin_id = tab['plugin_id']
|
||||
form_class = tab['form_class']
|
||||
|
||||
# Instantiate plugin form with POST data
|
||||
plugin_form = form_class(formdata=request.form)
|
||||
|
||||
# Save plugin settings (validation is optional for plugins)
|
||||
if plugin_form.data:
|
||||
save_plugin_settings(datastore.datastore_path, plugin_id, plugin_form.data)
|
||||
|
||||
flash("Settings updated.")
|
||||
|
||||
else:
|
||||
@@ -110,8 +130,30 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Convert to ISO 8601 format, all date/time relative events stored as UTC time
|
||||
utc_time = datetime.now(ZoneInfo("UTC")).isoformat()
|
||||
|
||||
# Get active plugins
|
||||
from changedetectionio.pluggy_interface import get_active_plugins
|
||||
import sys
|
||||
active_plugins = get_active_plugins()
|
||||
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
|
||||
# Get plugin settings tabs and instantiate forms
|
||||
plugin_tabs = get_plugin_settings_tabs()
|
||||
plugin_forms = {}
|
||||
|
||||
for tab in plugin_tabs:
|
||||
plugin_id = tab['plugin_id']
|
||||
form_class = tab['form_class']
|
||||
|
||||
# Load existing settings
|
||||
settings = load_plugin_settings(datastore.datastore_path, plugin_id)
|
||||
|
||||
# Instantiate the form with existing settings
|
||||
plugin_forms[plugin_id] = form_class(data=settings)
|
||||
|
||||
output = render_template("settings.html",
|
||||
active_plugins=active_plugins,
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
python_version=python_version,
|
||||
available_timezones=sorted(available_timezones()),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
|
||||
@@ -121,6 +163,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
utc_time=utc_time,
|
||||
plugin_tabs=plugin_tabs,
|
||||
plugin_forms=plugin_forms,
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
<div class="edit-form">
|
||||
<div class="inner">
|
||||
|
||||
<h4 style="margin-top: 0px;">Notification debug log</h4>
|
||||
<h4 style="margin-top: 0px;">{{ _('Notification debug log') }}</h4>
|
||||
<div id="notification-error-log">
|
||||
<ul style="font-size: 80%; margin:0px; padding: 0 0 0 7px">
|
||||
{% for log in logs|reverse %}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
{% from '_common_fields.html' import render_common_settings_form, show_token_placeholders %}
|
||||
<script>
|
||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
||||
{% if emailprefix %}
|
||||
@@ -18,14 +18,21 @@
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#fetching">Fetching</a></li>
|
||||
<li class="tab"><a href="#filters">Global Filters</a></li>
|
||||
<li class="tab"><a href="#ui-options">UI Options</a></li>
|
||||
<li class="tab"><a href="#api">API</a></li>
|
||||
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
||||
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
||||
<li class="tab" id=""><a href="#general">{{ _('General') }}</a></li>
|
||||
<li class="tab"><a href="#notifications">{{ _('Notifications') }}</a></li>
|
||||
<li class="tab"><a href="#fetching">{{ _('Fetching') }}</a></li>
|
||||
<li class="tab"><a href="#filters">{{ _('Global Filters') }}</a></li>
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
{% for tab in plugin_tabs %}
|
||||
<li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#info">{{ _('Info') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
@@ -35,18 +42,15 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.time_between_check, class="time-check-widget") }}
|
||||
<span class="pure-form-message-inline">Default recheck time for all watches, current system minimum is <i>{{min_system_recheck_seconds}}</i> seconds (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Misc-system-settings#enviroment-variables">more info</a>).</span>
|
||||
|
||||
<span class="pure-form-message-inline">{{ _('Default recheck time for all watches, current system minimum is') }} <i>{{min_system_recheck_seconds}}</i> {{ _('seconds') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Misc-system-settings#enviroment-variables">{{ _('more info') }}</a>).</span>
|
||||
<div id="time-between-check-schedule">
|
||||
<!-- Start Time and End Time -->
|
||||
<!-- Start Time and End Time {{ timezone_default_config }} -->
|
||||
<div id="limit-between-time">
|
||||
{{ render_time_schedule_form(form.requests, available_timezones, timezone_default_config) }}
|
||||
{{ render_time_schedule_form(form.requests, available_timezones, timezone_default_config) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
||||
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
|
||||
@@ -69,26 +73,13 @@
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
|
||||
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
|
||||
<span class="pure-form-message-inline">Allow access to the watch change history page when password is enabled (Good for sharing the diff page)
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
</div>
|
||||
<div class="grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -131,6 +122,10 @@
|
||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.timeout) }}
|
||||
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
|
||||
@@ -230,7 +225,37 @@ nav
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="timedate">
|
||||
<div class="tab-pane-inner" id="rss">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_diff_length) }}
|
||||
<span class="pure-form-message-inline">Maximum number of history snapshots to include in the watch specific RSS feed.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
|
||||
</div>
|
||||
<div class="pure-control-group grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_template_type) }}
|
||||
<span class="pure-form-message-inline">'System default' for the same template for all items, or re-use your "Notification Body" as the template.</span>
|
||||
</div>
|
||||
<div>
|
||||
{{ render_field(form.application.form.rss_template_override) }}
|
||||
{{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss") }}
|
||||
</div>
|
||||
</div>
|
||||
<br>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="timedate">
|
||||
<div class="pure-control-group">
|
||||
Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.
|
||||
</div>
|
||||
@@ -240,9 +265,7 @@ nav
|
||||
<p>
|
||||
{{ render_field(form.application.form.scheduler_timezone_default) }}
|
||||
<datalist id="timezones" style="display: none;">
|
||||
{% for tz_name in available_timezones %}
|
||||
<option value="{{ tz_name }}">{{ tz_name }}</option>
|
||||
{% endfor %}
|
||||
{%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%}
|
||||
</datalist>
|
||||
</p>
|
||||
</div>
|
||||
@@ -336,13 +359,51 @@ nav
|
||||
</p>
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
{% if plugin_tabs %}
|
||||
{% for tab in plugin_tabs %}
|
||||
<div class="tab-pane-inner" id="plugin-{{ tab.plugin_id }}">
|
||||
{% set plugin_form = plugin_forms[tab.plugin_id] %}
|
||||
{% if tab.template_path %}
|
||||
{# Plugin provides custom template - include it directly (no separate form) #}
|
||||
{% include tab.template_path with context %}
|
||||
{% else %}
|
||||
{# Default form rendering - fields only, no submit button #}
|
||||
<fieldset>
|
||||
{% for field in plugin_form %}
|
||||
{% if field.type != 'CSRFToken' and field.type != 'SubmitField' %}
|
||||
<div class="pure-control-group">
|
||||
{% if field.type == 'BooleanField' %}
|
||||
{{ render_checkbox_field(field) }}
|
||||
{% else %}
|
||||
{{ render_field(field) }}
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</fieldset>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<div class="tab-pane-inner" id="info">
|
||||
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
|
||||
<p><strong>{{ _('Plugins active:') }}</strong></p>
|
||||
{% if active_plugins %}
|
||||
<ul>
|
||||
{% for plugin in active_plugins %}
|
||||
<li><strong>{{ plugin.name }}</strong> - {{ plugin.description }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>{{ _('No plugins active') }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div id="actions">
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">{{ _('Back') }}</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">{{ _('Clear Snapshot History') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -21,9 +21,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
|
||||
|
||||
output = render_template("groups-overview.html",
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
available_tags=sorted_tags,
|
||||
form=add_form,
|
||||
tag_count=tag_count
|
||||
tag_count=tag_count,
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -149,9 +150,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
output = render_template("edit-tag.html",
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
extra_form_content=included_content,
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
**template_args
|
||||
)
|
||||
|
||||
|
||||
@@ -24,12 +24,12 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab" id=""><a href="#general">{{ _('General') }}</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#notifications">{{ _('Notifications') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -47,10 +47,10 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
<p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p>
|
||||
<p>{{ _('These settings are') }} <strong><i>{{ _('added') }}</i></strong> {{ _('to any existing watch configurations.') }}</p>
|
||||
{% include "edit/include_subtract.html" %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<h3>Text filtering</h3>
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
{% include "edit/text-options.html" %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -70,18 +70,18 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>Use with caution!</strong> This will easily fill up your email storage quota or flood other storages.
|
||||
<strong>{{ _('Use with caution!') }}</strong> {{ _('This will easily fill up your email storage quota or flood other storages.') }}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="field-group" id="notification-field-group">
|
||||
{% if has_default_notification_urls %}
|
||||
<div class="inline-warning">
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="Look out!" title="Lookout!" >
|
||||
There are <a href="{{ url_for('settings.settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
|
||||
{{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a>
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
|
||||
|
||||
{{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
</div>
|
||||
|
||||
@@ -2,22 +2,23 @@
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<legend>Add a new organisational tag</legend>
|
||||
<legend>{{ _('Add a new organisational tag') }}</legend>
|
||||
<div id="watch-add-wrapper-zone">
|
||||
<div>
|
||||
{{ render_simple_field(form.name, placeholder="Watch group / tag") }}
|
||||
{{ render_simple_field(form.name, placeholder=_("Watch group / tag")) }}
|
||||
</div>
|
||||
<div>
|
||||
{{ render_simple_field(form.save_button, title="Save" ) }}
|
||||
{{ render_simple_field(form.save_button, title=_("Save") ) }}
|
||||
</div>
|
||||
</div>
|
||||
<br>
|
||||
<div style="color: #fff;">Groups allows you to manage filters and notifications for multiple watches under a single organisational tag.</div>
|
||||
<div style="color: #fff;">{{ _('Groups allows you to manage filters and notifications for multiple watches under a single organisational tag.') }}</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
<!-- @todo maybe some overview matrix, 'tick' with which has notification, filter rules etc -->
|
||||
@@ -27,8 +28,8 @@
|
||||
<thead>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th># Watches</th>
|
||||
<th>Tag / Label name</th>
|
||||
<th>{{ _('# Watches') }}</th>
|
||||
<th>{{ _('Tag / Label name') }}</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
@@ -38,7 +39,7 @@
|
||||
--->
|
||||
{% if not available_tags|length %}
|
||||
<tr>
|
||||
<td colspan="3">No website organisational tags/groups configured</td>
|
||||
<td colspan="3">{{ _('No website organisational tags/groups configured') }}</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% for uuid, tag in available_tags %}
|
||||
@@ -49,9 +50,24 @@
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.unlink', uuid=uuid) }}" title="Keep the tag but unlink any watches">Unlink</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a class="pure-button button-error"
|
||||
href="{{ url_for('tags.delete', uuid=uuid) }}"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Delete Group?') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to delete group <strong>%(title)s</strong>?</p><p>This action cannot be undone.</p>', title=tag.title) }}"
|
||||
data-confirm-button="{{ _('Delete') }}"
|
||||
title="{{ _('Deletes and removes tag') }}">{{ _('Delete') }}</a>
|
||||
<a class="pure-button button-warning"
|
||||
href="{{ url_for('tags.unlink', uuid=uuid) }}"
|
||||
data-requires-confirm
|
||||
data-confirm-type="warning"
|
||||
data-confirm-title="{{ _('Unlink Group?') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to unlink all watches from group <strong>%(title)s</strong>?</p><p>The tag will be kept but watches will be removed from it.</p>', title=tag.title) }}"
|
||||
data-confirm-button="{{ _('Unlink') }}"
|
||||
title="{{ _('Keep the tag but unlink any watches') }}">{{ _('Unlink') }}</a>
|
||||
<a href="{{ url_for('rss.rss_tag_feed', tag_uuid=uuid, token=app_rss_token)}}"><img alt="{{ _('RSS Feed for this watch') }}" style="padding-left: 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
||||
@@ -6,6 +6,7 @@ from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
|
||||
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
from changedetectionio.blueprint.ui import diff, preview
|
||||
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
@@ -76,14 +77,14 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
|
||||
elif (op == 'notification-default'):
|
||||
from changedetectionio.notification import (
|
||||
default_notification_format_for_watch
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
)
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_title'] = None
|
||||
datastore.data['watching'][uuid]['notification_body'] = None
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches set to use default notification settings")
|
||||
|
||||
@@ -121,6 +122,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update)
|
||||
ui_blueprint.register_blueprint(views_blueprint)
|
||||
|
||||
# Register diff and preview blueprints
|
||||
diff_blueprint = diff.construct_blueprint(datastore)
|
||||
ui_blueprint.register_blueprint(diff_blueprint)
|
||||
|
||||
preview_blueprint = preview.construct_blueprint(datastore)
|
||||
ui_blueprint.register_blueprint(preview_blueprint)
|
||||
|
||||
# Import the login decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -249,7 +257,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
if i == 0:
|
||||
flash("No watches available to recheck.")
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
return redirect(url_for('watchlist.index', **({'tag': tag} if tag else {})))
|
||||
|
||||
@ui_blueprint.route("/form/checkbox-operations", methods=['POST'])
|
||||
@login_optionally_required
|
||||
|
||||
316
changedetectionio/blueprint/ui/diff.py
Normal file
316
changedetectionio/blueprint/ui/diff.py
Normal file
@@ -0,0 +1,316 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
|
||||
|
||||
import re
|
||||
import importlib
|
||||
from loguru import logger
|
||||
from markupsafe import Markup
|
||||
|
||||
from changedetectionio.diff import (
|
||||
REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
|
||||
REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
|
||||
ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
|
||||
CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
)
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")
|
||||
|
||||
@diff_blueprint.app_template_filter('diff_unescape_difference_spans')
|
||||
def diff_unescape_difference_spans(content):
|
||||
"""Emulate Jinja2's auto-escape, then selectively unescape our diff spans."""
|
||||
from markupsafe import escape
|
||||
|
||||
if not content:
|
||||
return Markup('')
|
||||
|
||||
# Step 1: Escape everything like Jinja2 would (this makes it XSS-safe)
|
||||
escaped_content = escape(str(content))
|
||||
|
||||
# Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body()
|
||||
# Pattern matches the exact structure:
|
||||
# <span style="{STYLE}" role="{ROLE}" aria-label="{LABEL}" title="{TITLE}">
|
||||
|
||||
# Unescape outer span opening tags with full attributes (role, aria-label, title)
|
||||
# Matches removed/added/changed/changed_into spans
|
||||
result = re.sub(
|
||||
rf'<span style="({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})" '
|
||||
rf'role="(deletion|insertion|note)" '
|
||||
rf'aria-label="([^&]+?)" '
|
||||
rf'title="([^&]+?)">',
|
||||
r'<span style="\1" role="\2" aria-label="\3" title="\4">',
|
||||
str(escaped_content),
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Unescape inner span opening tags (without additional attributes)
|
||||
# This matches the darker background styles for changed parts within lines
|
||||
result = re.sub(
|
||||
rf'<span style="({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})">',
|
||||
r'<span style="\1">',
|
||||
result,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Unescape closing tags (but only as many as we opened)
|
||||
open_count = result.count('<span style=')
|
||||
close_count = str(escaped_content).count('</span>')
|
||||
|
||||
# Replace up to the number of spans we opened
|
||||
for _ in range(min(open_count, close_count)):
|
||||
result = result.replace('</span>', '</span>', 1)
|
||||
|
||||
return Markup(result)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
"""
|
||||
Render the history/diff page for a watch.
|
||||
|
||||
This route is processor-aware: it delegates rendering to the processor's
|
||||
difference.py module, allowing different processor types to provide
|
||||
custom visualizations:
|
||||
- text_json_diff: Text/HTML diff with syntax highlighting
|
||||
- restock_diff: Could show price charts and stock history
|
||||
- image_diff: Could show image comparison slider/overlay
|
||||
|
||||
Each processor implements processors/{type}/difference.py::render()
|
||||
If a processor doesn't have a difference module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
if not dates or len(dates) < 2:
|
||||
flash("Not enough history (2 snapshots required) to show difference page for this watch.", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||
return default_render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_GET(uuid):
|
||||
"""
|
||||
Render the data extraction form for a watch.
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
extract.py module, allowing different processor types to provide
|
||||
custom extraction interfaces.
|
||||
|
||||
Each processor implements processors/{type}/extract.py::render_form()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's render_form() function
|
||||
if hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import render_form as default_render_form
|
||||
return default_render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_POST(uuid):
|
||||
"""
|
||||
Process the data extraction request.
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
extract.py module, allowing different processor types to provide
|
||||
custom extraction logic.
|
||||
|
||||
Each processor implements processors/{type}/extract.py::process_extraction()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's process_extraction() function
|
||||
if hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||
return default_process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
Serve processor-specific binary assets (images, files, etc.).
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
difference.py module, allowing different processor types to serve
|
||||
custom assets without embedding them as base64 in templates.
|
||||
|
||||
This solves memory issues with large binary data (e.g., screenshots)
|
||||
by streaming them as separate HTTP responses instead of embedding
|
||||
in the HTML template.
|
||||
|
||||
Each processor implements processors/{type}/difference.py::get_asset()
|
||||
which returns (binary_data, content_type, cache_control_header).
|
||||
|
||||
Example URLs:
|
||||
- /diff/{uuid}/processor-asset/before
|
||||
- /diff/{uuid}/processor-asset/after
|
||||
- /diff/{uuid}/processor-asset/rendered_diff
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return diff_blueprint
|
||||
@@ -1,8 +1,7 @@
|
||||
import time
|
||||
from copy import deepcopy
|
||||
import os
|
||||
import importlib.resources
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, abort
|
||||
from loguru import logger
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
@@ -96,6 +95,26 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form.datastore = datastore
|
||||
form.watch = default
|
||||
|
||||
# Load processor-specific config from JSON file for GET requests
|
||||
if request.method == 'GET' and processor_name:
|
||||
try:
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if processor_config:
|
||||
# Populate processor-config-* fields from JSON
|
||||
for config_key, config_value in processor_config.items():
|
||||
field_name = f'processor_config_{config_key}'
|
||||
if hasattr(form, field_name):
|
||||
getattr(form, field_name).data = config_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load processor config: {e}")
|
||||
|
||||
for p in datastore.extra_browsers:
|
||||
form.fetch_backend.choices.append(p)
|
||||
|
||||
@@ -114,11 +133,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
|
||||
# If they changed processor, it makes sense to reset it.
|
||||
if datastore.data['watching'][uuid].get('processor') != form.data.get('processor'):
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
flash("Reset watch history due to change of processor")
|
||||
|
||||
extra_update_obj = {
|
||||
'consecutive_filter_failures': 0,
|
||||
'last_error' : False
|
||||
@@ -129,7 +143,60 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
|
||||
# Ignore text
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
processor_config_data = {}
|
||||
fields_to_remove = []
|
||||
for field_name, field_value in form.data.items():
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
if field_value: # Only save non-empty values
|
||||
processor_config_data[config_key] = field_value
|
||||
fields_to_remove.append(field_name)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = form.data.get('processor')
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
# Try to import the edit_hook module from the processor package
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
||||
watch_obj = datastore.data['watching'][uuid]
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
|
||||
# Remove processor-config-* fields from form.data before updating datastore
|
||||
for field_name in fields_to_remove:
|
||||
form.data.pop(field_name, None)
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
|
||||
|
||||
@@ -206,7 +273,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid))
|
||||
return redirect(url_for('ui.ui_diff.diff_history_page', uuid=uuid))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get("tag",'')))
|
||||
|
||||
@@ -223,26 +290,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
|
||||
# if system or watch is configured to need a chrome type browser
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
watch_needs_selenium_or_playwright = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
watch_needs_selenium_or_playwright = True
|
||||
|
||||
|
||||
from zoneinfo import available_timezones
|
||||
|
||||
# Only works reliably with Playwright
|
||||
|
||||
# Import the global plugin system
|
||||
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
|
||||
|
||||
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras, get_fetcher_capabilities
|
||||
|
||||
# Get fetcher capabilities instead of hardcoded logic
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
c.append('checking-now')
|
||||
|
||||
template_args = {
|
||||
'available_processors': processors.available_processors(),
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
'browser_steps_config': browser_step_ui_config,
|
||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
|
||||
'extra_classes': ' '.join(c),
|
||||
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||
'extra_processor_config': form.extra_tab_content(),
|
||||
'extra_title': f" - Edit - {watch.label}",
|
||||
@@ -252,6 +318,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||
'jq_support': jq_support,
|
||||
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
'app_rss_token': app_rss_token,
|
||||
'rss_uuid_feed' : {
|
||||
'label': watch.label,
|
||||
'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token)
|
||||
},
|
||||
'settings_application': datastore.data['settings']['application'],
|
||||
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
|
||||
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
||||
@@ -261,7 +332,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
|
||||
'capabilities': capabilities
|
||||
}
|
||||
|
||||
included_content = None
|
||||
@@ -335,6 +406,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
s = re.sub(r'[0-9]+', r'\\d+', s)
|
||||
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
|
||||
|
||||
return f"<a href={url_for('ui.ui_views.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
|
||||
return edit_blueprint
|
||||
@@ -2,7 +2,6 @@ from flask import Blueprint, request, make_response
|
||||
import random
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -15,7 +14,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@notification_blueprint.route("/notification/send-test/", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def ajax_callback_send_notification_test(watch_uuid=None):
|
||||
|
||||
from changedetectionio.notification_service import NotificationContextData, set_basic_notification_vars
|
||||
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
|
||||
import apprise
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
@@ -39,11 +38,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
|
||||
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
|
||||
notification_urls = None
|
||||
|
||||
if request.form.get('notification_urls'):
|
||||
notification_urls = request.form['notification_urls'].strip().splitlines()
|
||||
notification_urls = request.form.get('notification_urls','').strip().splitlines()
|
||||
|
||||
if not notification_urls:
|
||||
logger.debug("Test notification - Trying by group/tag in the edit form if available")
|
||||
@@ -81,6 +76,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Only use if present, if not set in n_object it should use the default system value
|
||||
if 'notification_format' in request.form and request.form['notification_format'].strip():
|
||||
n_object['notification_format'] = request.form.get('notification_format', '').strip()
|
||||
else:
|
||||
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
if 'notification_title' in request.form and request.form['notification_title'].strip():
|
||||
n_object['notification_title'] = request.form.get('notification_title', '').strip()
|
||||
@@ -97,7 +94,27 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
n_object['notification_body'] = "Test body"
|
||||
|
||||
n_object['as_async'] = False
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
|
||||
# Same like in notification service, should be refactored
|
||||
dates = list(watch.history.keys())
|
||||
trigger_text = ''
|
||||
snapshot_contents = ''
|
||||
|
||||
# Could be called as a 'test notification' with only 1 snapshot available
|
||||
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
|
||||
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
|
||||
|
||||
if len(dates) > 1:
|
||||
prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2])
|
||||
current_snapshot = watch.get_history_snapshot(timestamp=dates[-1])
|
||||
|
||||
n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot,
|
||||
prev_snapshot=prev_snapshot,
|
||||
watch=watch,
|
||||
triggered_text=trigger_text,
|
||||
timestamp_changed=dates[-1] if dates else None))
|
||||
|
||||
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
200
changedetectionio/blueprint/ui/preview.py
Normal file
200
changedetectionio/blueprint/ui/preview.py
Normal file
@@ -0,0 +1,200 @@
|
||||
from flask import Blueprint, request, url_for, flash, render_template, redirect
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import html_tools
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
Render the preview page for a watch.
|
||||
|
||||
This route is processor-aware: it delegates rendering to the processor's
|
||||
preview.py module, allowing different processor types to provide
|
||||
custom visualizations:
|
||||
- text_json_diff: Text preview with syntax highlighting
|
||||
- image_ssim_diff: Image preview with proper rendering
|
||||
- restock_diff: Could show latest price/stock data
|
||||
|
||||
Each processor implements processors/{type}/preview.py::render()
|
||||
If a processor doesn't have a preview module, falls back to default text preview.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have preview module, use default text preview
|
||||
content = []
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
blocked_line_numbers = []
|
||||
|
||||
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
|
||||
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
try:
|
||||
versions = list(watch.history.keys())
|
||||
content = watch.get_history_snapshot(timestamp=timestamp)
|
||||
|
||||
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get('trigger_text'),
|
||||
mode='line numbers'
|
||||
)
|
||||
ignored_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get('ignore_text'),
|
||||
mode='line numbers'
|
||||
)
|
||||
blocked_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get("text_should_not_be_present"),
|
||||
mode='line numbers'
|
||||
)
|
||||
except Exception as e:
|
||||
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||
|
||||
from changedetectionio.pluggy_interface import get_fetcher_capabilities
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
|
||||
output = render_template("preview.html",
|
||||
capabilities=capabilities,
|
||||
content=content,
|
||||
current_diff_url=watch['url'],
|
||||
current_version=timestamp,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label} @ {timestamp}",
|
||||
highlight_ignored_line_numbers=ignored_line_numbers,
|
||||
highlight_triggered_line_numbers=triggered_line_numbers,
|
||||
highlight_blocked_line_numbers=blocked_line_numbers,
|
||||
history_n=watch.history_n,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
screenshot=watch.get_screenshot(),
|
||||
uuid=uuid,
|
||||
versions=versions,
|
||||
watch=watch,
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
Serve processor-specific binary assets for preview (images, files, etc.).
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
preview.py module, allowing different processor types to serve
|
||||
custom assets without embedding them as base64 in templates.
|
||||
|
||||
This solves memory issues with large binary data by streaming them
|
||||
as separate HTTP responses instead of embedding in the HTML template.
|
||||
|
||||
Each processor implements processors/{type}/preview.py::get_asset()
|
||||
which returns (binary_data, content_type, cache_control_header).
|
||||
|
||||
Example URLs:
|
||||
- /preview/{uuid}/processor-asset/screenshot?version=123456789
|
||||
"""
|
||||
from flask import make_response
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return preview_blueprint
|
||||
@@ -9,13 +9,12 @@
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
This will remove version history (snapshots) for ALL watches, but keep
|
||||
your list of URLs! <br />
|
||||
You may like to use the <strong>BACKUP</strong> link first.<br />
|
||||
{{ _('This will remove version history (snapshots) for ALL watches, but keep your list of URLs!') }} <br />
|
||||
{{ _('You may like to use the') }} <strong>{{ _('BACKUP') }}</strong> {{ _('link first.') }}<br />
|
||||
</div>
|
||||
<br />
|
||||
<div class="pure-control-group">
|
||||
<label for="confirmtext">Confirmation text</label>
|
||||
<label for="confirmtext">{{ _('Confirmation text') }}</label>
|
||||
<input
|
||||
type="text"
|
||||
id="confirmtext"
|
||||
@@ -25,20 +24,19 @@
|
||||
size="10"
|
||||
/>
|
||||
<span class="pure-form-message-inline"
|
||||
>Type in the word <strong>clear</strong> to confirm that you
|
||||
understand.</span
|
||||
>{{ _('Type in the word') }} <strong>{{ _('clear') }}</strong> {{ _('to confirm that you understand.') }}</span
|
||||
>
|
||||
</div>
|
||||
<br />
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary">
|
||||
Clear History!
|
||||
{{ _('Clear History!') }}
|
||||
</button>
|
||||
</div>
|
||||
<br />
|
||||
<div class="pure-control-group">
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel"
|
||||
>Cancel</a
|
||||
>{{ _('Cancel') }}</a
|
||||
>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
<ul id="highlightSnippetActions">
|
||||
<li>
|
||||
<button class="pure-button pure-button-primary" onclick="diffToJpeg()" title="{{ _('Share diff as image') }}">{{ _('Share as Image') }}</button>
|
||||
</li>
|
||||
<li>
|
||||
<a class="pure-button pure-button-primary" data-mode="exact" href="javascript:void(0);">{{ _('Ignore any lines matching') }}</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="pure-button pure-button-primary" data-mode="digit-regex" href="javascript:void(0);" >{{ _('Ignore any lines matching excluding digits') }}</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
166
changedetectionio/blueprint/ui/templates/diff.html
Normal file
166
changedetectionio/blueprint/ui/templates/diff.html
Normal file
@@ -0,0 +1,166 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
{% block content %}
|
||||
<script>
|
||||
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
{% if last_error_screenshot %}
|
||||
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
||||
{% endif %}
|
||||
|
||||
const highlight_submit_ignore_url="{{url_for('ui.ui_edit.highlight_submit_ignore_url', uuid=uuid)}}";
|
||||
const watch_url= {{watch_a.link|tojson}};
|
||||
|
||||
// Initial scroll position: if set, scroll to this line number in #difference on page load
|
||||
const initialScrollToLineNumber = {{ initial_scroll_line_number|default('null') }};
|
||||
</script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/piexifjs@1.0.6/piexif.min.js"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='snippet-to-image.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
||||
|
||||
|
||||
<div id="settings">
|
||||
<form class="pure-form " action="{{ url_for("ui.ui_diff.diff_history_page", uuid=uuid) }}" method="GET" id="diff-form">
|
||||
<fieldset class="diff-fieldset">
|
||||
{% if versions|length >= 1 %}
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-from" for="diff-from-version" class="from-to-label">{{ _('From') }}</label>
|
||||
<select id="diff-from-version" name="from_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
|
||||
{{ version }}{#{% if loop.index == 2 %} (Previous){% endif %}#}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-to" for="diff-to-version" class="from-to-label">{{ _('To') }}</label>
|
||||
<select id="diff-to-version" name="to_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
|
||||
{{ version }}{#{% if loop.first %} (Current){% endif %}#}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
{#<button type="submit" class="pure-button pure-button-primary reset-margin">Go</button>#}
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
<fieldset id="diff-style">
|
||||
<span>
|
||||
<label for="diffWords" class="pure-checkbox">
|
||||
<input type="radio" name="type" id="diffWords" value="diffWords" {% if diff_prefs.type == 'diffWords' %}checked=""{% endif %}> {{ _('Words') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="diffLines" class="pure-checkbox">
|
||||
<input type="radio" name="type" id="diffLines" value="diffLines" {% if diff_prefs.type == 'diffLines' %}checked=""{% endif %}> {{ _('Lines') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="ignoreWhitespace" class="pure-checkbox" id="label-diff-ignorewhitespace">
|
||||
<input type="checkbox" id="ignoreWhitespace" name="ignoreWhitespace" {% if diff_prefs.ignoreWhitespace %}checked=""{% endif %}> {{ _('Ignore Whitespace') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="changesOnly" class="pure-checkbox" id="label-diff-changes">
|
||||
<input type="checkbox" id="changesOnly" name="changesOnly" {% if diff_prefs.changesOnly %}checked=""{% endif %}> {{ _('Same/non-changed') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="removed" class="pure-checkbox" id="label-diff-removed">
|
||||
<input type="checkbox" id="removed" name="removed" {% if diff_prefs.removed %}checked=""{% endif %}> {{ _('Removed') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="added" class="pure-checkbox" id="label-diff-added">
|
||||
<input type="checkbox" id="added" name="added" {% if diff_prefs.added %}checked=""{% endif %}> {{ _('Added') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="replaced" class="pure-checkbox" id="label-diff-replaced">
|
||||
<input type="checkbox" id="replaced" name="replaced" {% if diff_prefs.replaced %}checked=""{% endif %}> {{ _('Replaced') }}</label>
|
||||
</span>
|
||||
</fieldset>
|
||||
{%- if versions|length >= 2 -%}
|
||||
<div id="keyboard-nav">
|
||||
<strong>{{ _('Keyboard:') }} </strong>
|
||||
<a href="" class="pure-button pure-button-primary" id="btn-previous"> ← {{ _('Previous') }}</a>
|
||||
<a class="pure-button pure-button-primary" id="btn-next" href=""> → {{ _('Next') }}</a>
|
||||
</div>
|
||||
{%- endif -%}
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="diff-jump">
|
||||
<a id="jump-next-diff" title="{{ _('Jump to next difference') }}">{{ _('Jump') }}</a>
|
||||
</div>
|
||||
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<div class="tabs">
|
||||
<ul>
|
||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">{{ _('Error Text') }}</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">{{ _('Error Screenshot') }}</a></li> {% endif %}
|
||||
<li class="tab" id="text-tab"><a href="#text">{{ _('Text') }}</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="#screenshot">{{ _('Current screenshot') }}</a></li>
|
||||
<li class="tab" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">{{ _('Extract Data') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div id="diff-ui">
|
||||
<div class="tab-pane-inner" id="error-text">
|
||||
<div class="snapshot-age error">{{watch_a.error_text_ctime|format_seconds_ago}} {{ _('seconds ago.') }}</div>
|
||||
<pre>
|
||||
{{ last_error_text }}
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="error-screenshot">
|
||||
<div class="snapshot-age error">{{watch_a.snapshot_error_screenshot_ctime|format_seconds_ago}} {{ _('seconds ago') }}</div>
|
||||
<img id="error-screenshot-img" style="max-width: 80%" alt="{{ _('Current error-ing screenshot from most recent request') }}" >
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="text">
|
||||
{%- if (content | default('')).split('\n') | length > 100 -%}
|
||||
<div id="cell-diff-jump-visualiser" style="user-select: none;">
|
||||
{%- for cell in diff_cell_grid -%}
|
||||
<div{% if cell.class %} class="{{ cell.class }}"{% endif %}></div>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
{%- endif -%}
|
||||
{%- if password_enabled_and_share_is_off -%}
|
||||
<div class="tip">{{ _('Pro-tip: You can enable') }} <strong>{{ _('"share access when password is enabled"') }}</strong> {{ _('from settings.') }}
|
||||
</div>
|
||||
{%- endif -%}
|
||||
<div id="text-diff-heading-area" style="user-select: none;">
|
||||
<div class="snapshot-age"><span>{{ from_version|format_timestamp_timeago }}</span>
|
||||
{%- if note -%}<span class="note"><strong>{{ note }}</strong></span>{%- endif -%}
|
||||
<a href="{{ url_for("ui.ui_preview.preview_page", uuid=uuid) }}">{{ _('Goto single snapshot') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
<pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
|
||||
<div id="diff-visualiser-area-after" style="user-select: none;">
|
||||
<strong>{{ _('Tip:') }}</strong> {{ _('Highlight text to share or add to ignore lists.') }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="screenshot">
|
||||
<div class="tip">
|
||||
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
|
||||
</div>
|
||||
{% if is_html_webdriver %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}" >
|
||||
{% else %}
|
||||
{{ _('No screenshot available just yet! Try rechecking the page.') }}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<strong>{{ _('Screenshot requires Playwright/WebDriver enabled') }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const newest_version_timestamp = {{newest_version_timestamp}};
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -1,12 +1,13 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='scheduler.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='conditions.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
|
||||
|
||||
<script>
|
||||
@@ -43,20 +44,20 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#request">Request</a></li>
|
||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">Conditions</a></li>
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#stats">Stats</a></li>
|
||||
<li class="tab"><a href="#notifications">{{ _('Notifications') }}</a></li>
|
||||
<li class="tab"><a href="#stats">{{ _('Stats') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -69,19 +70,19 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
|
||||
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
<div class="pure-form-message">{{ _('Some sites use JavaScript to create the content, for this you should') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">{{ _('use the Chrome/WebDriver Fetcher') }}</a></div>
|
||||
<div class="pure-form-message">{{ _('Variables are supported in the URL') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">{{ _('help and examples here') }}</a>).</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
|
||||
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, class="m-d", placeholder=watch.label) }}
|
||||
<span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span>
|
||||
<span class="pure-form-message-inline">{{ _('Automatically uses the page title if found, you can also use your own title/description here') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group time-between-check border-fieldset">
|
||||
|
||||
@@ -91,7 +92,7 @@
|
||||
{{ render_field(form.time_between_check, class="time-check-widget") }}
|
||||
|
||||
<span class="pure-form-message-inline">
|
||||
The interval/amount of time between each check.
|
||||
{{ _('The interval/amount of time between each check.') }}
|
||||
</span>
|
||||
</div>
|
||||
<div id="time-between-check-schedule">
|
||||
@@ -106,7 +107,7 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.filter_failure_notification_send) }}
|
||||
<span class="pure-form-message-inline">
|
||||
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
|
||||
{{ _('Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
@@ -119,17 +120,17 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched site doesn\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.') }} </p>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
</span>
|
||||
</div>
|
||||
{% if form.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
<div>{{ form.proxy.label }} <a href="" id="check-all-proxies" class="pure-button button-secondary button-xsmall" >Check/Scan all</a></div>
|
||||
<div>{{ form.proxy.label }} <a href="" id="check-all-proxies" class="pure-button button-secondary button-xsmall" >{{ _('Check/Scan all') }}</a></div>
|
||||
<div>{{ form.proxy(class="fetch-backend-proxy") }}</div>
|
||||
<span class="pure-form-message-inline">
|
||||
Choose a proxy for this watch
|
||||
{{ _('Choose a proxy for this watch') }}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
@@ -139,31 +140,29 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.webdriver_delay) }}
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<br>
|
||||
This will wait <i>n</i> seconds before extracting the text.
|
||||
{{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
|
||||
{% if using_global_webdriver_wait %}
|
||||
<br><strong>Using the current global default settings</strong>
|
||||
<br><strong>{{ _('Using the current global default settings') }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">Show advanced options</a>
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
|
||||
</div>
|
||||
<div class="advanced-options" style="display: none;">
|
||||
{{ render_field(form.webdriver_js_execute_code) }}
|
||||
<div class="pure-form-message-inline">
|
||||
Run this code before performing change detection, handy for filling in fields and other
|
||||
actions <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">More
|
||||
help and examples here</a>
|
||||
{{ _('Run this code before performing change detection, handy for filling in fields and other actions') }} <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">{{ _('More help and examples here') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<!-- html requests always -->
|
||||
<fieldset data-visible-for="fetch_backend=html_requests">
|
||||
<div class="pure-control-group">
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">Show advanced options</a>
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
|
||||
</div>
|
||||
<div class="advanced-options" style="display: none;">
|
||||
<div class="pure-control-group" id="request-method">
|
||||
@@ -178,7 +177,7 @@
|
||||
\"year\":{% now 'Europe/Berlin', '%Y' %}
|
||||
}") }}
|
||||
</div>
|
||||
<div class="pure-form-message">Variables are supported in the request body (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
<div class="pure-form-message">{{ _('Variables are supported in the request body') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">{{ _('help and examples here') }}</a>).</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<!-- hmm -->
|
||||
@@ -187,15 +186,15 @@
|
||||
Cookie: foobar
|
||||
User-Agent: wonderbra 1.0
|
||||
Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-form-message">Variables are supported in the request header values (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
<div class="pure-form-message">{{ _('Variables are supported in the request header values') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">{{ _('help and examples here') }}</a>).</div>
|
||||
<div class="pure-form-message-inline">
|
||||
{% if has_extra_headers_file %}
|
||||
<strong>Alert! Extra headers file found and will be added to this watch!</strong>
|
||||
<strong>{{ _('Alert! Extra headers file found and will be added to this watch!') }}</strong>
|
||||
{% else %}
|
||||
Headers can be also read from a file in your data-directory <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">Read more here</a>
|
||||
{{ _('Headers can be also read from a file in your data-directory') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">{{ _('Read more here') }}</a>
|
||||
{% endif %}
|
||||
<br>
|
||||
(Not supported by Selenium browser)
|
||||
({{ _('Not supported by Selenium browser') }})
|
||||
</div>
|
||||
</div>
|
||||
<fieldset data-visible-for="fetch_backend=html_requests fetch_backend=html_webdriver" >
|
||||
@@ -206,9 +205,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
{# Only works with playwright #}
|
||||
{% if system_has_playwright_configured %}
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
{% if visual_selector_data_ready %}
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
@@ -220,19 +218,19 @@ Math: {{ 1 + 1 }}") }}
|
||||
|
||||
<!--- Do this later -->
|
||||
<div class="checkbox" style="display: none;">
|
||||
<input type=checkbox id="include_text_elements" > <label for="include_text_elements">Turn on text finder</label>
|
||||
<input type=checkbox id="include_text_elements" > <label for="include_text_elements">{{ _('Turn on text finder') }}</label>
|
||||
</div>
|
||||
|
||||
<div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
|
||||
<div id="loading-status-text" style="display: none;">{{ _('Please wait, first browser step can take a little time to load..') }}<div class="spinner"></div></div>
|
||||
<div class="flex-wrapper" >
|
||||
|
||||
<div id="browser-steps-ui" class="noselect">
|
||||
<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
|
||||
<span class="loader" >
|
||||
<span id="browsersteps-click-start">
|
||||
<h2 >Click here to Start</h2>
|
||||
<h2 >{{ _('Click here to Start') }}</h2>
|
||||
<svg style="height: 3.5rem;" version="1.1" viewBox="0 0 32 32" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g id="start"/><g id="play_x5F_alt"><path d="M16,0C7.164,0,0,7.164,0,16s7.164,16,16,16s16-7.164,16-16S24.836,0,16,0z M10,24V8l16.008,8L10,24z" style="fill: var(--color-grey-400);"/></g></svg><br>
|
||||
Please allow 10-15 seconds for the browser to connect.<br>
|
||||
{{ _('Please allow 10-15 seconds for the browser to connect.') }}<br>
|
||||
</span>
|
||||
<div class="spinner" style="display: none;"></div>
|
||||
</span>
|
||||
@@ -241,22 +239,20 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
<div id="browser-steps-fieldlist" >
|
||||
<span id="browser-seconds-remaining">Press "Play" to start.</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
<span id="browser-seconds-remaining">{{ _('Press "Play" to start.') }}</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
{{ render_field(form.browser_steps) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
{% else %}
|
||||
{# it's configured to use selenium or chrome but system says its not configured #}
|
||||
{{ playwright_warning() }}
|
||||
{% if system_has_webdriver_configured %}
|
||||
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
|
||||
{% endif %}
|
||||
<strong>{{ _('Visual Selector data is not ready, watch needs to be checked atleast once.') }}</strong>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{# "This functionality needs chrome.." #}
|
||||
{{ only_playwright_type_watches_warning() }}
|
||||
<p>
|
||||
<strong>{{ _('Sorry, this functionality only works with fetchers that support interactive Javascript (so far only Playwright based fetchers)') }}<br>
|
||||
{{ _('You need to') }} <a href="#request">{{ _('Set the fetch method') }}</a> {{ _('to one that supports interactive Javascript.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
@@ -266,28 +262,28 @@ Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_ternary_field(form.notification_muted, BooleanField=true) }}
|
||||
</div>
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
{% if capabilities.supports_screenshots %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>Use with caution!</strong> This will easily fill up your email storage quota or flood other storages.
|
||||
<strong>{{ _('Use with caution!') }}</strong> {{ _('This will easily fill up your email storage quota or flood other storages.') }}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="field-group" id="notification-field-group">
|
||||
{% if has_default_notification_urls %}
|
||||
<div class="inline-warning">
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="Look out!" title="Lookout!" >
|
||||
There are <a href="{{ url_for('settings.settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
|
||||
{{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a>
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
|
||||
{{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
|
||||
<div class="tab-pane-inner" id="conditions">
|
||||
<script>
|
||||
@@ -298,23 +294,23 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ render_conditions_fieldlist_of_formfields_as_table(form.conditions) }}
|
||||
<div class="pure-form-message-inline">
|
||||
|
||||
<p id="verify-state-text">Use the verify (✓) button to test if a condition passes against the current snapshot.</p>
|
||||
Read a quick tutorial about <a href="https://changedetection.io/tutorial/conditional-actions-web-page-changes">using conditional web page changes here</a>.<br>
|
||||
<p id="verify-state-text">{{ _('Use the verify (✓) button to test if a condition passes against the current snapshot.') }}</p>
|
||||
{{ _('Read a quick tutorial about') }} <a href="https://changedetection.io/tutorial/conditional-actions-web-page-changes">{{ _('using conditional web page changes here') }}</a>.<br>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">Activate preview</span>
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<strong>Pro-tips:</strong><br>
|
||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
Use the preview page to see your filters and triggers highlighted.
|
||||
{{ _('Use the preview page to see your filters and triggers highlighted.') }}
|
||||
</li>
|
||||
<li>
|
||||
Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a>
|
||||
{{ _('Some sites use JavaScript to create the content, for this you should') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">{{ _('use the Chrome/WebDriver Fetcher') }}</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
@@ -322,50 +318,51 @@ Math: {{ 1 + 1 }}") }}
|
||||
{% include "edit/include_subtract.html" %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>Text filtering</h3>
|
||||
Limit trigger/ignore/block/extract to;<br>
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
{{ _('Limit trigger/ignore/block/extract to;') }}<br>
|
||||
{{ render_checkbox_field(form.filter_text_added) }}
|
||||
{{ render_checkbox_field(form.filter_text_replaced) }}
|
||||
{{ render_checkbox_field(form.filter_text_removed) }}
|
||||
<span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br>
|
||||
<span class="pure-form-message-inline"> So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
|
||||
<span class="pure-form-message-inline"> When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
|
||||
<span class="pure-form-message-inline">{{ _('Note: Depending on the length and similarity of the text on each line, the algorithm may consider an') }} <strong>{{ _('addition') }}</strong> {{ _('instead of') }} <strong>{{ _('replacement') }}</strong> {{ _('for example.') }}</span><br>
|
||||
<span class="pure-form-message-inline"> {{ _('So it\'s always better to select') }} <strong>{{ _('Added') }}</strong>+<strong>{{ _('Replaced') }}</strong> {{ _('when you\'re interested in new content.') }}</span><br>
|
||||
<span class="pure-form-message-inline"> {{ _('When content is merely moved in a list, it will also trigger an') }} <strong>{{ _('addition') }}</strong>, {{ _('consider enabling') }} <code><strong>{{ _('Only trigger when unique lines appear') }}</strong></code></span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.') }}</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.remove_duplicate_lines) }}
|
||||
<span class="pure-form-message-inline">Remove duplicate lines of text</span>
|
||||
<span class="pure-form-message-inline">{{ _('Remove duplicate lines of text') }}</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.sort_text_alphabetically) }}
|
||||
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with') }} <i>{{ _('check unique lines') }}</i> {{ _('below.') }}</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.trim_text_whitespace) }}
|
||||
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
|
||||
<span class="pure-form-message-inline">{{ _('Remove any whitespace before and after each line of text') }}</span>
|
||||
</fieldset>
|
||||
{% include "edit/text-options.html" %}
|
||||
</div>
|
||||
</div>
|
||||
<div id="text-preview" style="display: none;" >
|
||||
<script>
|
||||
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
|
||||
</script>
|
||||
<br>
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
<script>
|
||||
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
|
||||
</script>
|
||||
<br>
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
<p>{{ _('Loading...') }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
|
||||
<p>{{ _('Loading...') }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ highlight_trigger_ignored_explainer() }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -377,41 +374,55 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
{% if system_has_playwright_configured %}
|
||||
<span class="pure-form-message-inline" id="visual-selector-heading">
|
||||
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
|
||||
</span>
|
||||
{% if capabilities.supports_screenshots and capabilities.supports_xpath_element_data %}
|
||||
{% if visual_selector_data_ready %}
|
||||
<span class="pure-form-message-inline" id="visual-selector-heading">
|
||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||
</span>
|
||||
|
||||
<div id="selector-header">
|
||||
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
|
||||
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
|
||||
<i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
|
||||
</div>
|
||||
<div id="selector-wrapper" style="display: none">
|
||||
<!-- request the screenshot and get the element offset info ready -->
|
||||
<!-- use img src ready load to know everything is ready to map out -->
|
||||
<!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
|
||||
<img id="selector-background" >
|
||||
<canvas id="selector-canvas"></canvas>
|
||||
</div>
|
||||
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div>
|
||||
{% else %}
|
||||
{# The watch needed chrome but system says that playwright is not ready #}
|
||||
{{ playwright_warning() }}
|
||||
{% endif %}
|
||||
{% if system_has_webdriver_configured %}
|
||||
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||
<label style="margin-right: 15px;">
|
||||
<input type="radio" name="selector-mode" value="element" style="margin-right: 5px;">
|
||||
{{ _('Select by element') }}
|
||||
</label>
|
||||
<label>
|
||||
<input type="radio" name="selector-mode" value="draw" checked style="margin-right: 5px;">
|
||||
{{ _('Draw area') }}
|
||||
</label>
|
||||
{{ render_field(form.processor_config_bounding_box) }}
|
||||
{{ render_field(form.processor_config_selection_mode) }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div id="selector-header">
|
||||
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">{{ _('Clear selection') }}</a>
|
||||
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
|
||||
<i class="fetching-update-notice" style="font-size: 80%;">{{ _('One moment, fetching screenshot and element information..') }}</i>
|
||||
</div>
|
||||
<div id="selector-wrapper" style="display: none">
|
||||
<!-- request the screenshot and get the element offset info ready -->
|
||||
<!-- use img src ready load to know everything is ready to map out -->
|
||||
<!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
|
||||
<img id="selector-background" >
|
||||
<canvas id="selector-canvas"></canvas>
|
||||
</div>
|
||||
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>{{ _('Currently:') }}</strong> <span class="text">{{ _('Loading...') }}</span></div>
|
||||
{% else %}
|
||||
<strong>{{ _('Visual Selector data is not ready, watch needs to be checked atleast once.') }}</strong>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{# "This functionality needs chrome.." #}
|
||||
{{ only_playwright_type_watches_warning() }}
|
||||
<p>
|
||||
<strong>{{ _('Sorry, this functionality only works with fetchers that support Javascript and screenshots (such as playwright etc).') }}<br>
|
||||
{{ _('You need to') }} <a href="#request">{{ _('Set the fetch method') }}</a> {{ _('to one that supports Javascript and screenshots.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</fieldset>
|
||||
@@ -427,27 +438,27 @@ Math: {{ 1 + 1 }}") }}
|
||||
<table class="pure-table" id="stats-table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Check count</td>
|
||||
<td>{{ _('Check count') }}</td>
|
||||
<td>{{ "{:,}".format( watch.check_count) }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Consecutive filter failures</td>
|
||||
<td>{{ _('Consecutive filter failures') }}</td>
|
||||
<td>{{ "{:,}".format( watch.consecutive_filter_failures) }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>History length</td>
|
||||
<td>{{ _('History length') }}</td>
|
||||
<td>{{ "{:,}".format(watch.history|length) }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Last fetch duration</td>
|
||||
<td>{{ _('Last fetch duration') }}</td>
|
||||
<td>{{ watch.fetch_time }}s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Notification alert count</td>
|
||||
<td>{{ _('Notification alert count') }}</td>
|
||||
<td>{{ watch.notification_alert_count }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Server type reply</td>
|
||||
<td>{{ _('Server type reply') }}</td>
|
||||
<td>{{ watch.get('remote_server_reply') }}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
@@ -461,7 +472,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
|
||||
{% if watch.history_n %}
|
||||
<p>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
@@ -471,11 +482,22 @@ Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('ui.form_delete', uuid=uuid)}}"
|
||||
class="pure-button button-error ">Delete</a>
|
||||
class="pure-button button-error"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Delete Watch?') }}"
|
||||
data-confirm-message="<p>{{ _('Are you sure you want to delete the watch for:') }}</p><p><strong>{{ watch.get('url', 'this watch') }}</strong></p><p>{{ _('This action cannot be undone.') }}</p>"
|
||||
data-confirm-button="{{ _('Delete') }}">{{ _('Delete') }}</a>
|
||||
{% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}"
|
||||
class="pure-button button-error">Clear History</a>{% endif %}
|
||||
class="pure-button button-error"
|
||||
data-requires-confirm
|
||||
data-confirm-type="warning"
|
||||
data-confirm-title="{{ _('Clear History?') }}"
|
||||
data-confirm-message="<p>{{ _('Are you sure you want to clear all history for:') }}</p><p><strong>{{ watch.get('url', 'this watch') }}</strong></p><p>{{ _('This will remove all snapshots and previous versions. This action cannot be undone.') }}</p>"
|
||||
data-confirm-button="{{ _('Clear History') }}">{{ _('Clear History') }}</a>{% endif %}
|
||||
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
|
||||
class="pure-button">Clone & Edit</a>
|
||||
class="pure-button">{{ _('Clone & Edit') }}</a>
|
||||
<a href="{{ url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token)}}"><img alt="{{ _('RSS Feed for this watch') }}" style="padding: .5em 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% from '_helpers.html' import highlight_trigger_ignored_explainer %}
|
||||
{% block content %}
|
||||
<script>
|
||||
const screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
const triggered_line_numbers = {{ triggered_line_numbers|tojson }};
|
||||
const triggered_line_numbers = {{ highlight_triggered_line_numbers|tojson }};
|
||||
const ignored_line_numbers = {{ highlight_ignored_line_numbers|tojson }};
|
||||
const blocked_line_numbers = {{ highlight_blocked_line_numbers|tojson }};
|
||||
{% if last_error_screenshot %}
|
||||
const error_screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
||||
{% endif %}
|
||||
@@ -14,10 +16,10 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='preview.js') }}" defer></script>
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="settings" style="text-align: center;">
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">Select timestamp</label> <select id="preview-version"
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
class="needs-localtime">
|
||||
{% for version in versions|reverse %}
|
||||
@@ -26,27 +28,27 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
</form>
|
||||
<br>
|
||||
<strong>Keyboard: </strong><a href="" class="pure-button pure-button-primary" id="btn-previous">
|
||||
← Previous</a> <a class="pure-button pure-button-primary" id="btn-next" href="">
|
||||
→ Next</a>
|
||||
<strong>{{ _('Keyboard:') }} </strong><a href="" class="pure-button pure-button-primary" id="btn-previous">
|
||||
← {{ _('Previous') }}</a> <a class="pure-button pure-button-primary" id="btn-next" href="">
|
||||
→ {{ _('Next') }}</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tabs">
|
||||
<ul>
|
||||
{% if last_error_text %}
|
||||
<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
|
||||
<li class="tab" id="error-text-tab"><a href="#error-text">{{ _('Error Text') }}</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}
|
||||
<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">Error Screenshot</a>
|
||||
<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">{{ _('Error Screenshot') }}</a>
|
||||
</li> {% endif %}
|
||||
{% if history_n > 0 %}
|
||||
<li class="tab" id="text-tab"><a href="#text">Text</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
|
||||
<li class="tab" id="text-tab"><a href="#text">{{ _('Text') }}</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="#screenshot">{{ _('Current screenshot') }}</a></li>
|
||||
{% endif %}
|
||||
</ul>
|
||||
</div>
|
||||
@@ -54,50 +56,39 @@
|
||||
|
||||
<div id="diff-ui">
|
||||
<div class="tab-pane-inner" id="error-text">
|
||||
<div class="snapshot-age error">{{ watch.error_text_ctime|format_seconds_ago }} seconds ago</div>
|
||||
<div class="snapshot-age error">{{ watch.error_text_ctime|format_seconds_ago }} {{ _('seconds ago') }}</div>
|
||||
<pre>
|
||||
{{ last_error_text }}
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="error-screenshot">
|
||||
<div class="snapshot-age error">{{ watch.snapshot_error_screenshot_ctime|format_seconds_ago }} seconds ago
|
||||
<div class="snapshot-age error">{{ watch.snapshot_error_screenshot_ctime|format_seconds_ago }} {{ _('seconds ago') }}
|
||||
</div>
|
||||
<img id="error-screenshot-img" style="max-width: 80%"
|
||||
alt="Current erroring screenshot from most recent request">
|
||||
alt="{{ _('Current erroring screenshot from most recent request') }}">
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="text">
|
||||
{{ highlight_trigger_ignored_explainer() }}
|
||||
<div class="snapshot-age">{{ current_version|format_timestamp_timeago }}</div>
|
||||
<span class="tip"><strong>Pro-tip</strong>: Highlight text to add to ignore filters</span>
|
||||
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td id="diff-col" class="highlightable-filter">
|
||||
<pre style="border-left: 2px solid #ddd;">
|
||||
{{ content }}
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="screenshot">
|
||||
<div class="tip">
|
||||
For now, Differences are performed on text, not graphically, only the latest screenshot is available.
|
||||
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
|
||||
</div>
|
||||
<br>
|
||||
{% if is_html_webdriver %}
|
||||
{% if capabilities.supports_screenshots %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{ watch.snapshot_screenshot_ctime|format_timestamp_timeago }}</div>
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request">
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}">
|
||||
{% else %}
|
||||
No screenshot available just yet! Try rechecking the page.
|
||||
{{ _('No screenshot available just yet! Try rechecking the page.') }}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<strong>Screenshot requires Playwright/WebDriver enabled</strong>
|
||||
<strong>{{ _('Screenshot requires a Content Fetcher ( Sockpuppetbrowser, selenium, etc ) that supports screenshots.') }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -1,207 +1,11 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||
import os
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from flask import Blueprint, request, redirect, url_for, flash
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
|
||||
|
||||
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
content = []
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
triggered_line_numbers = []
|
||||
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
|
||||
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
try:
|
||||
versions = list(watch.history.keys())
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
|
||||
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch['trigger_text'],
|
||||
mode='line numbers'
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||
|
||||
output = render_template("preview.html",
|
||||
content=content,
|
||||
current_version=timestamp,
|
||||
history_n=watch.history_n,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label} @ {timestamp}",
|
||||
triggered_line_numbers=triggered_line_numbers,
|
||||
current_diff_url=watch['url'],
|
||||
screenshot=watch.get_screenshot(),
|
||||
watch=watch,
|
||||
uuid=uuid,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_text=watch.get_error_text(),
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
versions=versions
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_build_report(uuid):
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
return _render_diff_template(uuid, extract_form)
|
||||
|
||||
else:
|
||||
extract_regex = request.form.get('extract_regex', '').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = "0"
|
||||
return response
|
||||
|
||||
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
|
||||
|
||||
def _render_diff_template(uuid, extract_form=None):
|
||||
"""Helper function to render the diff template with all required data"""
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
|
||||
# If a "from_version" was requested, then find it (or the closest one)
|
||||
# Also set "from version" to be the closest version to the one that was last viewed.
|
||||
|
||||
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
|
||||
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
|
||||
from_version = request.args.get('from_version', from_version_timestamp )
|
||||
|
||||
# Use the current one if nothing was specified
|
||||
to_version = request.args.get('to_version', str(dates[-1]))
|
||||
|
||||
try:
|
||||
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
|
||||
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
|
||||
|
||||
try:
|
||||
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
|
||||
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
return render_template("diff.html",
|
||||
current_diff_url=watch['url'],
|
||||
from_version=str(from_version),
|
||||
to_version=str(to_version),
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label}",
|
||||
extract_form=extract_form,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
left_sticky=True,
|
||||
newest=to_version_file_contents,
|
||||
newest_version_timestamp=dates[-1],
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
from_version_file_contents=from_version_file_contents,
|
||||
to_version_file_contents=to_version_file_contents,
|
||||
screenshot=screenshot_url,
|
||||
uuid=uuid,
|
||||
versions=dates, # All except current/last
|
||||
watch_a=watch
|
||||
)
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
return _render_diff_template(uuid)
|
||||
|
||||
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
||||
@login_optionally_required
|
||||
|
||||
@@ -6,6 +6,7 @@ from flask_login import current_user
|
||||
from flask_paginate import Pagination, get_page_parameter
|
||||
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import processors
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -90,6 +91,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
processor_badge_texts=processors.get_processor_badge_texts(),
|
||||
processor_descriptions=processors.get_processor_descriptions(),
|
||||
processor_badge_css=processors.get_processor_badge_css(),
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
search_q=request.args.get('q', '').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<script>let nowtimeserver={{ now_time_server }};</script>
|
||||
<script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script>
|
||||
<script>
|
||||
@@ -18,17 +19,20 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
background-repeat: no-repeat;
|
||||
transition: background-size 0.9s ease
|
||||
}
|
||||
|
||||
/* Auto-generated processor badge colors */
|
||||
{{ processor_badge_css|safe }}
|
||||
</style>
|
||||
<div class="box" id="form-quick-watch-add">
|
||||
|
||||
<form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<legend>Add a new web page change detection watch</legend>
|
||||
<legend>{{ _('Add a new web page change detection watch') }}</legend>
|
||||
<div id="watch-add-wrapper-zone">
|
||||
{{ render_nolabel_field(form.url, placeholder="https://...", required=true) }}
|
||||
{{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
{{ render_nolabel_field(form.watch_submit_button, title=_("Watch this URL!") ) }}
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title=_("Edit first then Watch") ) }}
|
||||
</div>
|
||||
<div id="watch-group-tag">
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
|
||||
@@ -38,7 +42,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
</div>
|
||||
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="{{ _('Create a shareable link') }}" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > {{ _("Tip: You can also add 'shared' watches.") }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">{{ _('More info') }}</a></span>
|
||||
</form>
|
||||
</div>
|
||||
<div class="box">
|
||||
@@ -46,24 +50,34 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
|
||||
<div id="checkbox-operations">
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Pause') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('UnPause') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Mute') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('UnMute') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Recheck') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Tag') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Mark viewed') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Use default notification') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Clear errors') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Clear Histories') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to clear history for the selected items?</p><p>This action cannot be undone.</p>') }}"
|
||||
data-confirm-button="{{ _('OK') }}"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Clear/reset history') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Delete Watches?') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to delete the selected watches?</strong></p><p>This action cannot be undone.</p>') }}"
|
||||
data-confirm-button="{{ _('Delete') }}"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Delete') }}</button>
|
||||
</div>
|
||||
{%- if watches|length >= pagination.per_page -%}
|
||||
{{ pagination.info }}
|
||||
{%- endif -%}
|
||||
{%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
||||
{%- if search_q -%}<div id="search-result-info">{{ _('Searching') }} "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
||||
<div>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">{{ _('All') }}</a>
|
||||
|
||||
<!-- tag list -->
|
||||
{%- for uuid, tag in tags -%}
|
||||
@@ -101,19 +115,19 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
|
||||
<a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
||||
</th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">{{ _('Website') }} <span class='arrow {{link_order}}'></span></a></th>
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
<th>Restock & Price</th>
|
||||
<th>{{ _('Restock & Price') }}</th>
|
||||
{%- endif -%}
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last') }}</span> {{ _('Checked') }} <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last') }}</span> {{ _('Changed') }} <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th class="empty-cell"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{%- if not watches|length -%}
|
||||
<tr>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">{{ _('No website watches configured, please add a URL in the box above, or') }} <a href="{{ url_for('imports.import_page')}}" >{{ _('import a list') }}</a>.</td>
|
||||
</tr>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -167,14 +181,14 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
@@ -182,11 +196,9 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{%- if watch.get_fetch_backend == "html_webdriver"
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
-%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
|
||||
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
|
||||
{{ effective_fetcher|fetcher_status_icons }}
|
||||
{%- endif -%}
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
@@ -198,20 +210,20 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<td class="restock-and-price">
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
{%- if watch.has_restock_info -%}
|
||||
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
|
||||
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="{{ _('Detecting restock and price') }}">
|
||||
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
||||
{%- if watch['restock']['in_stock']-%} In stock {%- else-%} Not in stock {%- endif -%}
|
||||
{%- if watch['restock']['in_stock']-%} {{ _('In stock') }} {%- else-%} {{ _('Not in stock') }} {%- endif -%}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
|
||||
{%- if watch['restock']['price'] != None -%}
|
||||
<span class="restock-label price" title="Price">
|
||||
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">No information</span>
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
</td>
|
||||
@@ -219,24 +231,24 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{#last_checked becomes fetch-start-time#}
|
||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
|
||||
<div class="spinner-wrapper" style="display:none;" >
|
||||
<span class="spinner"></span><span> Checking now</span>
|
||||
<span class="spinner"></span><span class="status-text"> {{ _('Checking now') }}</span>
|
||||
</div>
|
||||
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
|
||||
</td>
|
||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
{%- else -%}
|
||||
Not yet
|
||||
{{ _('Not yet') }}
|
||||
{%- endif -%}
|
||||
</td>
|
||||
<td class="buttons">
|
||||
<div>
|
||||
{%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%}
|
||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
|
||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
|
||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">{{ _('Queued') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">{{ _('Recheck') }}</a>
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">{{ _('History') }}</a>
|
||||
<a href="{{ url_for('ui.ui_preview.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">{{ _('Preview') }}</a>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -245,22 +257,21 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
</table>
|
||||
<ul id="post-list-buttons">
|
||||
<li id="post-list-with-errors" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">{{ _('With errors') }} ({{ errored_count }})</a>
|
||||
</li>
|
||||
<li id="post-list-mark-views" style="display: none;" >
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">{{ _('Mark all viewed') }}</a>
|
||||
</li>
|
||||
{%- if active_tag_uuid -%}
|
||||
<li id="post-list-mark-views-tag">
|
||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">{{ _("Mark all viewed in '%(title)s'", title=active_tag.title) }}</a>
|
||||
</li>
|
||||
{%- endif -%}
|
||||
<li id="post-list-unread" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">{{ _('Unread') }} (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
||||
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">{{ _('Recheck all') }} {% if active_tag_uuid %} {{ _("in '%(title)s'", title=active_tag.title) }}{%endif%}</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
Levenshtein distance and similarity plugin for text change detection.
|
||||
Provides metrics for measuring text similarity between snapshots.
|
||||
"""
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
Word count plugin for content analysis.
|
||||
Provides word count metrics for snapshot content.
|
||||
"""
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
|
||||
@@ -14,7 +18,7 @@ def count_words_in_history(watch, incoming_text=None):
|
||||
elif watch.history.keys():
|
||||
# When called from UI extras to count latest snapshot
|
||||
latest_key = list(watch.history.keys())[-1]
|
||||
latest_content = watch.get_history_snapshot(latest_key)
|
||||
latest_content = watch.get_history_snapshot(timestamp=latest_key)
|
||||
return len(latest_content.split())
|
||||
return 0
|
||||
except Exception as e:
|
||||
|
||||
@@ -7,6 +7,9 @@ import os
|
||||
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
||||
|
||||
# Import hookimpl from centralized pluggy interface
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
|
||||
SCREENSHOT_DEFAULT_QUALITY = 40
|
||||
|
||||
@@ -18,7 +21,9 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
|
||||
# The size at which we will switch to stitching method, when below this (and
|
||||
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
||||
# screenshot method.
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
||||
# Increased from 8000 to 10000 for better performance (fewer chunks = faster)
|
||||
# Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))
|
||||
|
||||
# available_fetchers() will scan this implementation looking for anything starting with html_
|
||||
# this information is used in the form selections
|
||||
@@ -35,17 +40,54 @@ def available_fetchers():
|
||||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||||
import inspect
|
||||
p = []
|
||||
|
||||
# Get built-in fetchers (but skip plugin fetchers that were added via setattr)
|
||||
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
|
||||
if inspect.isclass(obj):
|
||||
# @todo html_ is maybe better as fetcher_ or something
|
||||
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
|
||||
if name.startswith('html_'):
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
# Skip plugin fetchers that were already registered
|
||||
if name not in _plugin_fetchers:
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
|
||||
# Get plugin fetchers from cache (already loaded at module init)
|
||||
for name, fetcher_class in _plugin_fetchers.items():
|
||||
if hasattr(fetcher_class, 'fetcher_description'):
|
||||
t = tuple([name, fetcher_class.fetcher_description])
|
||||
p.append(t)
|
||||
else:
|
||||
logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def get_plugin_fetchers():
|
||||
"""Load and return all plugin fetchers from the centralized plugin manager."""
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
|
||||
fetchers = {}
|
||||
try:
|
||||
# Call the register_content_fetcher hook from all registered plugins
|
||||
results = plugin_manager.hook.register_content_fetcher()
|
||||
for result in results:
|
||||
if result:
|
||||
name, fetcher_class = result
|
||||
fetchers[name] = fetcher_class
|
||||
# Register in current module so hasattr() checks work
|
||||
setattr(sys.modules[__name__], name, fetcher_class)
|
||||
logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading plugin fetchers: {e}")
|
||||
|
||||
return fetchers
|
||||
|
||||
|
||||
# Initialize plugins at module load time
|
||||
_plugin_fetchers = get_plugin_fetchers()
|
||||
|
||||
|
||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
|
||||
# rather than site-specific.
|
||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
@@ -62,3 +104,8 @@ else:
|
||||
logger.debug("Falling back to selenium as fetcher")
|
||||
from .webdriver_selenium import fetcher as html_webdriver
|
||||
|
||||
|
||||
# Register built-in fetchers as plugins after all imports are complete
|
||||
from changedetectionio.pluggy_interface import register_builtin_fetchers
|
||||
register_builtin_fetchers()
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ class Fetcher():
|
||||
favicon_blob = None
|
||||
instock_data = None
|
||||
instock_data_js = ""
|
||||
screenshot_format = None
|
||||
status_code = None
|
||||
webdriver_js_execute_code = None
|
||||
xpath_data = None
|
||||
@@ -64,6 +65,35 @@ class Fetcher():
|
||||
# Time ONTOP of the system defined env minimum time
|
||||
render_extract_delay = 0
|
||||
|
||||
# Fetcher capability flags - subclasses should override these
|
||||
# These indicate what features the fetcher supports
|
||||
supports_browser_steps = False # Can execute browser automation steps
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return data for status icon to display in the watch overview.
|
||||
|
||||
This method can be overridden by subclasses to provide custom status icons.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with icon data:
|
||||
{
|
||||
'filename': 'icon-name.svg', # Icon filename
|
||||
'alt': 'Alt text', # Alt attribute
|
||||
'title': 'Tooltip text', # Title attribute
|
||||
'style': 'height: 1em;' # Optional inline CSS
|
||||
}
|
||||
Or None if no icon
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear_content(self):
|
||||
"""
|
||||
Explicitly clear all content from memory to free up heap space.
|
||||
@@ -75,7 +105,6 @@ class Fetcher():
|
||||
self.screenshot = None
|
||||
self.xpath_data = None
|
||||
# Keep headers and status_code as they're small
|
||||
logger.trace("Fetcher content cleared from memory")
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
@@ -93,12 +122,13 @@ class Fetcher():
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def quit(self, watch=None):
|
||||
async def quit(self, watch=None):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -9,10 +9,10 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
async def capture_full_page_async(page):
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
from multiprocessing import Process, Pipe
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
|
||||
@@ -26,8 +26,20 @@ async def capture_full_page_async(page):
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
@@ -35,36 +47,68 @@ async def capture_full_page_async(page):
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# Only scroll if not at the top (y > 0)
|
||||
if y > 0:
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
|
||||
# Request GC only before screenshot (not 3x per chunk)
|
||||
await page.request_gc()
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
await page.request_gc()
|
||||
screenshot_chunks.append(await page.screenshot(
|
||||
type="jpeg",
|
||||
full_page=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
))
|
||||
|
||||
screenshot_kwargs = {
|
||||
'type': screenshot_type,
|
||||
'full_page': False
|
||||
}
|
||||
# Only pass quality parameter for jpeg (PNG doesn't support it in Playwright)
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
y += step_size
|
||||
await page.request_gc()
|
||||
|
||||
# Restore original viewport size
|
||||
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
# Unlock element dimensions if they were locked
|
||||
if elements_locked:
|
||||
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
parent_conn, child_conn = Pipe()
|
||||
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
screenshot_chunks = None
|
||||
return screenshot
|
||||
|
||||
@@ -89,8 +133,22 @@ class fetcher(Fetcher):
|
||||
|
||||
proxy = None
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Playwright fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||
|
||||
@@ -125,7 +183,7 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
@@ -139,7 +197,7 @@ class fetcher(Fetcher):
|
||||
content = await self.page.content()
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
logger.debug(f"Saving step HTML to {destination}")
|
||||
with open(destination, 'w') as f:
|
||||
with open(destination, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
async def run(self,
|
||||
@@ -151,8 +209,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
@@ -244,7 +304,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page)
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
@@ -293,7 +353,7 @@ class fetcher(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page)
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
@@ -330,4 +390,17 @@ class fetcher(Fetcher):
|
||||
browser = None
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PlaywrightFetcherPlugin:
|
||||
"""Plugin class that registers the Playwright fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Playwright fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
playwright_plugin = PlaywrightFetcherPlugin()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -20,10 +20,10 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page):
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
from multiprocessing import Process, Pipe
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
|
||||
@@ -41,11 +41,25 @@ async def capture_full_page(page):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
@@ -60,18 +74,34 @@ async def capture_full_page(page):
|
||||
y
|
||||
)
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(type_='jpeg',
|
||||
fullPage=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
|
||||
screenshot_kwargs = {
|
||||
'type_': screenshot_type,
|
||||
'fullPage': False
|
||||
}
|
||||
# PNG doesn't support quality parameter in Puppeteer
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
# Unlock element dimensions if they were locked
|
||||
if elements_locked:
|
||||
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
parent_conn, child_conn = Pipe()
|
||||
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
@@ -93,13 +123,27 @@ class fetcher(Fetcher):
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
|
||||
|
||||
browser = None
|
||||
browser_type = ''
|
||||
command_executor = ''
|
||||
|
||||
proxy = None
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Puppeteer fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if custom_browser_connection_url:
|
||||
self.browser_connection_is_custom = True
|
||||
@@ -128,21 +172,20 @@ class fetcher(Fetcher):
|
||||
proxy_url += f"{parsed.hostname}{port}{parsed.path}{q}"
|
||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||
|
||||
# def screenshot_step(self, step_n=''):
|
||||
# screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
|
||||
#
|
||||
# if self.browser_steps_screenshot_path is not None:
|
||||
# destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||
# logger.debug(f"Saving step screenshot to {destination}")
|
||||
# with open(destination, 'wb') as f:
|
||||
# f.write(screenshot)
|
||||
#
|
||||
# def save_step_html(self, step_n):
|
||||
# content = self.page.content()
|
||||
# destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
# logger.debug(f"Saving step HTML to {destination}")
|
||||
# with open(destination, 'w') as f:
|
||||
# f.write(content)
|
||||
async def quit(self, watch=None):
|
||||
try:
|
||||
await self.page.close()
|
||||
del self.page
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
try:
|
||||
await self.browser.close()
|
||||
del self.browser
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
logger.info("Cleanup puppeteer complete.")
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
@@ -153,8 +196,10 @@ class fetcher(Fetcher):
|
||||
request_body,
|
||||
request_headers,
|
||||
request_method,
|
||||
screenshot_format,
|
||||
timeout,
|
||||
url,
|
||||
watch_uuid
|
||||
):
|
||||
import re
|
||||
self.delete_browser_steps_screenshots()
|
||||
@@ -170,9 +215,9 @@ class fetcher(Fetcher):
|
||||
# Connect directly using the specified browser_ws_endpoint
|
||||
# @todo timeout
|
||||
try:
|
||||
browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
except websockets.exceptions.InvalidStatusCode as e:
|
||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||
except websockets.exceptions.InvalidURI:
|
||||
@@ -181,7 +226,7 @@ class fetcher(Fetcher):
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
self.page = await browser.newPage()
|
||||
self.page = await self.browser.newPage()
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
@@ -196,7 +241,6 @@ class fetcher(Fetcher):
|
||||
"height": int(match.group(2))
|
||||
})
|
||||
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
|
||||
|
||||
try:
|
||||
from pyppeteerstealth import inject_evasions_into_page
|
||||
except ImportError:
|
||||
@@ -241,33 +285,47 @@ class fetcher(Fetcher):
|
||||
# browsersteps_interface = steppable_browser_interface()
|
||||
# browsersteps_interface.page = self.page
|
||||
|
||||
async def handle_frame_navigation(event):
|
||||
logger.debug(f"Frame navigated: {event}")
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
# Enable Network domain to detect when first bytes arrive
|
||||
await self.page._client.send('Network.enable')
|
||||
|
||||
self.page._client.on('Page.frameStartedNavigating', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
||||
self.page._client.on('Page.frameStartedLoading', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
||||
self.page._client.on('Page.frameStoppedLoading', lambda event: logger.debug(f"Frame stopped loading: {event}"))
|
||||
async def setup_frame_handlers_on_first_response(event):
|
||||
# Only trigger for the main document response
|
||||
if event.get('type') == 'Document':
|
||||
logger.debug("First response received, setting up frame handlers for forced page stop load.")
|
||||
|
||||
# De-register this listener - we only need it once
|
||||
self.page._client.remove_listener('Network.responseReceived', setup_frame_handlers_on_first_response)
|
||||
|
||||
# Now set up the frame navigation handlers
|
||||
async def handle_frame_navigation(event):
|
||||
# Wait n seconds after the frameStartedLoading, not from any frameStartedLoading/frameStartedNavigating
|
||||
logger.debug(f"Frame navigated: {event}")
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
|
||||
self.page._client.on('Page.frameStartedNavigating', lambda e: asyncio.create_task(handle_frame_navigation(e)))
|
||||
self.page._client.on('Page.frameStartedLoading', lambda e: asyncio.create_task(handle_frame_navigation(e)))
|
||||
self.page._client.on('Page.frameStoppedLoading', lambda e: logger.debug(f"Frame stopped loading: {e}"))
|
||||
|
||||
# Listen for first response to trigger frame handler setup
|
||||
self.page._client.on('Network.responseReceived', setup_frame_handlers_on_first_response)
|
||||
|
||||
response = None
|
||||
attempt=0
|
||||
while not response:
|
||||
logger.debug(f"Attempting page fetch {url} attempt {attempt}")
|
||||
response = await self.page.goto(url)
|
||||
response = await self.page.goto(url, timeout=0)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
if response:
|
||||
break
|
||||
if not response:
|
||||
logger.warning("Page did not fetch! trying again!")
|
||||
if response is None and attempt>=2:
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attmpt {attempt}")
|
||||
logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attempt {attempt}")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
attempt+=1
|
||||
|
||||
@@ -279,8 +337,6 @@ class fetcher(Fetcher):
|
||||
except Exception as e:
|
||||
logger.warning("Got exception when running evaluate on custom JS code")
|
||||
logger.error(str(e))
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
# This can be ok, we will try to grab what we could retrieve
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
@@ -290,8 +346,6 @@ class fetcher(Fetcher):
|
||||
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
|
||||
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
|
||||
logger.critical(response)
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
@@ -301,7 +355,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -309,8 +363,6 @@ class fetcher(Fetcher):
|
||||
|
||||
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
||||
logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
@@ -328,6 +380,12 @@ class fetcher(Fetcher):
|
||||
await self.page.evaluate(f"var include_filters=''")
|
||||
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
|
||||
self.content = await self.page.content
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
@@ -335,17 +393,10 @@ class fetcher(Fetcher):
|
||||
if not self.xpath_data:
|
||||
raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
|
||||
|
||||
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
|
||||
self.content = await self.page.content
|
||||
|
||||
self.screenshot = await capture_full_page(page=self.page)
|
||||
|
||||
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
|
||||
logger.success(f"Fetching '{url}' complete, closing page")
|
||||
await self.page.close()
|
||||
logger.success(f"Fetching '{url}' complete, closing browser")
|
||||
await browser.close()
|
||||
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
|
||||
|
||||
async def main(self, **kwargs):
|
||||
@@ -360,8 +411,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
|
||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||
@@ -378,9 +431,24 @@ class fetcher(Fetcher):
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
screenshot_format=None,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
watch_uuid=watch_uuid,
|
||||
), timeout=max_time
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PuppeteerFetcherPlugin:
|
||||
"""Plugin class that registers the Puppeteer fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Puppeteer fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
puppeteer_plugin = PuppeteerFetcherPlugin()
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import asyncio
|
||||
from functools import partial
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
@@ -11,8 +13,8 @@ from changedetectionio.content_fetchers.base import Fetcher
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.proxy_override = proxy_override
|
||||
# browser_connection_url is none because its always 'launched locally'
|
||||
|
||||
@@ -25,7 +27,9 @@ class fetcher(Fetcher):
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
empty_pages_are_a_change=False,
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Synchronous version of run - the original requests implementation"""
|
||||
|
||||
import chardet
|
||||
@@ -76,9 +80,22 @@ class fetcher(Fetcher):
|
||||
if not is_binary:
|
||||
# Don't run this for PDF (and requests identified as binary) takes a _long_ time
|
||||
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# For XML/RSS feeds, check the XML declaration for encoding attribute
|
||||
# This is more reliable than chardet which can misdetect UTF-8 as MacRoman
|
||||
content_type = r.headers.get('content-type', '').lower()
|
||||
if 'xml' in content_type or 'rss' in content_type:
|
||||
# Look for <?xml version="1.0" encoding="UTF-8"?>
|
||||
xml_encoding_match = re.search(rb'<\?xml[^>]+encoding=["\']([^"\']+)["\']', r.content[:200])
|
||||
if xml_encoding_match:
|
||||
r.encoding = xml_encoding_match.group(1).decode('ascii')
|
||||
else:
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
@@ -104,6 +121,12 @@ class fetcher(Fetcher):
|
||||
|
||||
self.raw_content = r.content
|
||||
|
||||
# If the content is an image, set it as screenshot for SSIM/visual comparison
|
||||
content_type = r.headers.get('content-type', '').lower()
|
||||
if 'image/' in content_type:
|
||||
self.screenshot = r.content
|
||||
logger.debug(f"Image content detected ({content_type}), set as screenshot for comparison")
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
@@ -113,8 +136,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
@@ -132,11 +157,12 @@ class fetcher(Fetcher):
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=current_include_filters,
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
watch_uuid=watch_uuid,
|
||||
)
|
||||
)
|
||||
|
||||
def quit(self, watch=None):
|
||||
async def quit(self, watch=None):
|
||||
|
||||
# In case they switched to `requests` fetcher from something else
|
||||
# Then the screenshot could be old, in any case, it's not used here.
|
||||
@@ -149,3 +175,15 @@ class fetcher(Fetcher):
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class RequestsFetcherPlugin:
|
||||
"""Plugin class that registers the requests fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the requests fetcher"""
|
||||
return ('html_requests', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
requests_plugin = RequestsFetcherPlugin()
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
* 1. Temporarily change the viewport height to a large value (e.g., 800px → 3809px)
|
||||
* 2. Take screenshots in chunks while scrolling
|
||||
* 3. Stitch the chunks together
|
||||
*
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
|
||||
(() => {
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
properties.forEach(prop => {
|
||||
originalStyles[prop] = {
|
||||
value: el.style.getPropertyValue(prop),
|
||||
priority: el.style.getPropertyPriority(prop)
|
||||
};
|
||||
});
|
||||
window.__elementSizingRestore.set(el, originalStyles);
|
||||
|
||||
// Lock dimensions with !important to override media queries
|
||||
if (rect.height > 0) {
|
||||
el.style.setProperty('height', computed.height, 'important');
|
||||
el.style.setProperty('min-height', computed.height, 'important');
|
||||
el.style.setProperty('max-height', computed.height, 'important');
|
||||
}
|
||||
if (rect.width > 0) {
|
||||
el.style.setProperty('width', computed.width, 'important');
|
||||
el.style.setProperty('min-width', computed.width, 'important');
|
||||
el.style.setProperty('max-width', computed.width, 'important');
|
||||
}
|
||||
});
|
||||
|
||||
// Also disable ResizeObserver for JS-based resizing
|
||||
window.ResizeObserver = class {
|
||||
constructor() {}
|
||||
observe() {}
|
||||
unobserve() {}
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
})();
|
||||
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Unlock Element Dimensions After Screenshot Capture
|
||||
*
|
||||
* This script removes the inline !important styles that were applied by lock-elements-sizing.js
|
||||
* and restores elements to their original state using the WeakMap created during locking.
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script AFTER completing screenshot capture and restoring the viewport.
|
||||
* This allows the page to return to its normal responsive behavior.
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element that was locked
|
||||
* 2. Reads original style values from the global WeakMap
|
||||
* 3. Restores original inline styles (or removes them if they weren't set originally)
|
||||
* 4. Cleans up the WeakMap
|
||||
*
|
||||
* @see lock-elements-sizing.js for the locking mechanism
|
||||
*/
|
||||
|
||||
(() => {
|
||||
// Check if the restore map exists
|
||||
if (!window.__elementSizingRestore) {
|
||||
console.log('⚠ Element sizing restore map not found - elements may not have been locked');
|
||||
return;
|
||||
}
|
||||
|
||||
// Restore all locked dimension styles to their original state
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const originalStyles = window.__elementSizingRestore.get(el);
|
||||
|
||||
if (originalStyles) {
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
|
||||
properties.forEach(prop => {
|
||||
const original = originalStyles[prop];
|
||||
|
||||
if (original.value) {
|
||||
// Restore original value with original priority
|
||||
el.style.setProperty(prop, original.value, original.priority || '');
|
||||
} else {
|
||||
// Was not set originally, so remove it
|
||||
el.style.removeProperty(prop);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Clean up the global WeakMap
|
||||
delete window.__elementSizingRestore;
|
||||
|
||||
console.log('✓ Element dimensions unlocked - page restored to original state');
|
||||
})();
|
||||
@@ -8,14 +8,90 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
@@ -27,21 +103,17 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption on top (overlaid, not extending canvas)
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font_size = 35
|
||||
font_color = (255, 0, 0)
|
||||
background_color = (255, 255, 255)
|
||||
|
||||
|
||||
# Try to load a proper font
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", font_size)
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
font = ImageFont.load_default()
|
||||
|
||||
@@ -49,19 +121,16 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white rectangle background behind text
|
||||
rect_top = 0
|
||||
rect_bottom = text_height + 2 * padding
|
||||
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered horizontally, 10px padding from top of the rectangle
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
text_y = padding
|
||||
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send image
|
||||
# Encode and send image with optimization
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
|
||||
stitched.close()
|
||||
|
||||
@@ -14,8 +14,22 @@ class fetcher(Fetcher):
|
||||
proxy = None
|
||||
proxy_url = None
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for WebDriver fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
from urllib.parse import urlparse
|
||||
from selenium.webdriver.common.proxy import Proxy
|
||||
|
||||
@@ -55,8 +69,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
|
||||
import asyncio
|
||||
@@ -131,7 +147,21 @@ class fetcher(Fetcher):
|
||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||
self.content = driver.page_source
|
||||
self.headers = {}
|
||||
self.screenshot = driver.get_screenshot_as_png()
|
||||
|
||||
# Selenium always captures as PNG, convert to JPEG if needed
|
||||
screenshot_png = driver.get_screenshot_as_png()
|
||||
|
||||
# Convert to JPEG if requested (for smaller file size)
|
||||
if self.screenshot_format and self.screenshot_format.upper() == 'JPEG':
|
||||
from PIL import Image
|
||||
import io
|
||||
img = Image.open(io.BytesIO(screenshot_png))
|
||||
jpeg_buffer = io.BytesIO()
|
||||
img.save(jpeg_buffer, format='JPEG', quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
self.screenshot = jpeg_buffer.getvalue()
|
||||
img.close()
|
||||
else:
|
||||
self.screenshot = screenshot_png
|
||||
except Exception as e:
|
||||
driver.quit()
|
||||
raise e
|
||||
@@ -141,3 +171,16 @@ class fetcher(Fetcher):
|
||||
# Run the selenium operations in a thread pool to avoid blocking the event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, _run_sync)
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class WebDriverSeleniumFetcherPlugin:
|
||||
"""Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the WebDriver Selenium fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
webdriver_selenium_plugin = WebDriverSeleniumFetcherPlugin()
|
||||
|
||||
@@ -1,119 +0,0 @@
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
|
||||
HTML_REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
HTML_ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
|
||||
# These get set to html or telegram type or discord compatible or whatever in handler.py
|
||||
REMOVED_PLACEMARKER_OPEN = '<<<removed_PLACEMARKER_OPEN'
|
||||
REMOVED_PLACEMARKER_CLOSED = '<<<removed_PLACEMARKER_CLOSED'
|
||||
|
||||
ADDED_PLACEMARKER_OPEN = '<<<added_PLACEMARKER_OPEN'
|
||||
ADDED_PLACEMARKER_CLOSED = '<<<added_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_PLACEMARKER_OPEN = '<<<changed_PLACEMARKER_OPEN'
|
||||
CHANGED_PLACEMARKER_CLOSED = '<<<changed_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_INTO_PLACEMARKER_OPEN = '<<<changed_into_PLACEMARKER_OPEN'
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED = '<<<changed_into_PLACEMARKER_CLOSED'
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
|
||||
def customSequenceMatcher(
|
||||
before: List[str],
|
||||
after: List[str],
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
|
||||
Args:
|
||||
before (List[str]): Original sequence
|
||||
after (List[str]): Modified sequence
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
|
||||
|
||||
|
||||
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if include_equal and tag == 'equal':
|
||||
yield before[alo:ahi]
|
||||
elif include_removed and tag == 'delete':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] + \
|
||||
[f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
|
||||
elif include_added and tag == 'insert':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(after, blo, bhi)
|
||||
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
newest_version_file_contents: str,
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
line_feed_sep: str = "\n",
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
|
||||
Args:
|
||||
previous_version_file_contents (str): Original file contents
|
||||
newest_version_file_contents (str): Modified file contents
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
line_feed_sep (str): Separator for lines in output
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
|
||||
|
||||
if patch_format:
|
||||
patch = difflib.unified_diff(previous_lines, newest_lines)
|
||||
return line_feed_sep.join(patch)
|
||||
|
||||
rendered_diff = customSequenceMatcher(
|
||||
before=previous_lines,
|
||||
after=newest_lines,
|
||||
include_equal=include_equal,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst)
|
||||
|
||||
return flatten(rendered_diff)
|
||||
479
changedetectionio/diff/__init__.py
Normal file
479
changedetectionio/diff/__init__.py
Normal file
@@ -0,0 +1,479 @@
|
||||
"""
|
||||
Diff rendering module for change detection.
|
||||
|
||||
This module provides functions for rendering differences between text content,
|
||||
with support for various output formats and tokenization strategies.
|
||||
"""
|
||||
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
from loguru import logger
|
||||
import diff_match_patch as dmp_module
|
||||
import re
|
||||
import time
|
||||
|
||||
from .tokenizers import TOKENIZERS, tokenize_words_and_html
|
||||
|
||||
# Remember! gmail, outlook etc dont support <style> must be inline.
|
||||
# Gmail: strips <ins> and <del> tags entirely.
|
||||
# This is for the WHOLE line background style
|
||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
HTML_REMOVED_STYLE = REMOVED_STYLE # Export alias for handler.py
|
||||
HTML_ADDED_STYLE = ADDED_STYLE # Export alias for handler.py
|
||||
|
||||
# Darker backgrounds for nested highlighting (changed parts within lines)
|
||||
REMOVED_INNER_STYLE = "background-color: #ff867a; color: #111;"
|
||||
ADDED_INNER_STYLE = "background-color: #b2e841; color: #444;"
|
||||
HTML_CHANGED_STYLE = REMOVED_STYLE
|
||||
HTML_CHANGED_INTO_STYLE = ADDED_STYLE
|
||||
|
||||
# Placemarker constants - these get replaced by apply_service_tweaks() in handler.py
|
||||
# Something that cant get escaped to HTML by accident
|
||||
REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN'
|
||||
REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED'
|
||||
|
||||
ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN'
|
||||
ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN'
|
||||
CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN'
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
"""
|
||||
Render word-level differences between two lines inline using diff-match-patch library.
|
||||
|
||||
Args:
|
||||
before_line: Original line text
|
||||
after_line: Modified line text
|
||||
ignore_junk: Ignore whitespace-only changes
|
||||
markdown_style: Unused (kept for backwards compatibility)
|
||||
tokenizer: Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Returns:
|
||||
tuple[str, bool]: (diff output with inline word-level highlighting, has_changes flag)
|
||||
"""
|
||||
# Normalize whitespace if ignore_junk is enabled
|
||||
if ignore_junk:
|
||||
# Normalize whitespace: replace multiple spaces/tabs with single space
|
||||
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
|
||||
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
|
||||
else:
|
||||
before_normalized = before_line
|
||||
after_normalized = after_line
|
||||
|
||||
# Use diff-match-patch with word-level tokenization
|
||||
# Strategy: Use linesToChars to treat words as atomic units
|
||||
dmp = dmp_module.diff_match_patch()
|
||||
|
||||
# Get the tokenizer function from the registry
|
||||
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
|
||||
|
||||
# Tokenize both lines using the selected tokenizer
|
||||
before_tokens = tokenizer_func(before_normalized)
|
||||
after_tokens = tokenizer_func(after_normalized or ' ')
|
||||
|
||||
# Create mappings for linesToChars (using it for word-mode)
|
||||
# Join tokens with newline so each "line" is a token
|
||||
before_text = '\n'.join(before_tokens)
|
||||
after_text = '\n'.join(after_tokens)
|
||||
|
||||
# Use linesToChars for word-mode diffing
|
||||
lines_result = dmp.diff_linesToChars(before_text, after_text)
|
||||
line_before, line_after, line_array = lines_result
|
||||
|
||||
# Perform diff on the encoded strings
|
||||
diffs = dmp.diff_main(line_before, line_after, False)
|
||||
|
||||
# Convert back to original text
|
||||
dmp.diff_charsToLines(diffs, line_array)
|
||||
|
||||
# Remove the newlines we added for tokenization
|
||||
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
|
||||
|
||||
# DON'T apply semantic cleanup here - it would break token boundaries
|
||||
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
|
||||
# We want to preserve the tokenizer's word boundaries
|
||||
|
||||
# Check if there are any changes
|
||||
has_changes = any(op != 0 for op, _ in diffs)
|
||||
|
||||
if ignore_junk and not has_changes:
|
||||
return after_line, False
|
||||
|
||||
# Check if the whole line is replaced (no unchanged content)
|
||||
whole_line_replaced = not any(op == 0 and text.strip() for op, text in diffs)
|
||||
|
||||
# Build the output using placemarkers
|
||||
# When whole line is replaced, wrap entire removed content once and entire added content once
|
||||
if whole_line_replaced:
|
||||
removed_tokens = []
|
||||
added_tokens = []
|
||||
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal (e.g., whitespace tokens in common positions)
|
||||
# Include in both removed and added to preserve spacing
|
||||
removed_tokens.append(text)
|
||||
added_tokens.append(text)
|
||||
elif op == -1: # Deletion
|
||||
removed_tokens.append(text)
|
||||
elif op == 1: # Insertion
|
||||
added_tokens.append(text)
|
||||
|
||||
# Join all tokens and wrap the entire string once for removed, once for added
|
||||
result_parts = []
|
||||
|
||||
if removed_tokens:
|
||||
removed_full = ''.join(removed_tokens).rstrip()
|
||||
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
|
||||
if added_tokens:
|
||||
if result_parts: # Add newline between removed and added
|
||||
result_parts.append('\n')
|
||||
added_full = ''.join(added_tokens).rstrip()
|
||||
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
else:
|
||||
# Inline changes within the line
|
||||
result_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
result_parts.append(text)
|
||||
elif op == 1: # Insertion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
elif op == -1: # Deletion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
|
||||
|
||||
def render_nested_line_diff(before_line: str, after_line: str, ignore_junk: bool = False, tokenizer: str = 'words_and_html') -> tuple[str, str, bool]:
|
||||
"""
|
||||
Render line-level differences with nested highlighting for changed parts.
|
||||
|
||||
Returns two separate lines:
|
||||
- Before line: light red background with dark red on removed parts
|
||||
- After line: light green background with dark green on added parts
|
||||
|
||||
Args:
|
||||
before_line: Original line text
|
||||
after_line: Modified line text
|
||||
ignore_junk: Ignore whitespace-only changes
|
||||
tokenizer: Name of tokenizer to use from TOKENIZERS registry
|
||||
|
||||
Returns:
|
||||
tuple[str, str, bool]: (before_with_highlights, after_with_highlights, has_changes)
|
||||
"""
|
||||
# Normalize whitespace if ignore_junk is enabled
|
||||
if ignore_junk:
|
||||
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
|
||||
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
|
||||
else:
|
||||
before_normalized = before_line
|
||||
after_normalized = after_line
|
||||
|
||||
# Use diff-match-patch with word-level tokenization
|
||||
dmp = dmp_module.diff_match_patch()
|
||||
|
||||
# Get the tokenizer function from the registry
|
||||
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
|
||||
|
||||
# Tokenize both lines
|
||||
before_tokens = tokenizer_func(before_normalized)
|
||||
after_tokens = tokenizer_func(after_normalized or ' ')
|
||||
|
||||
# Create mappings for linesToChars
|
||||
before_text = '\n'.join(before_tokens)
|
||||
after_text = '\n'.join(after_tokens)
|
||||
|
||||
# Use linesToChars for word-mode diffing
|
||||
lines_result = dmp.diff_linesToChars(before_text, after_text)
|
||||
line_before, line_after, line_array = lines_result
|
||||
|
||||
# Perform diff on the encoded strings
|
||||
diffs = dmp.diff_main(line_before, line_after, False)
|
||||
|
||||
# Convert back to original text
|
||||
dmp.diff_charsToLines(diffs, line_array)
|
||||
|
||||
# Remove the newlines we added for tokenization
|
||||
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
|
||||
|
||||
# DON'T apply semantic cleanup here - it would break token boundaries
|
||||
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
|
||||
# We want to preserve the tokenizer's word boundaries
|
||||
|
||||
# Check if there are any changes
|
||||
has_changes = any(op != 0 for op, _ in diffs)
|
||||
|
||||
if ignore_junk and not has_changes:
|
||||
return before_line, after_line, False
|
||||
|
||||
# Build the before line (with nested highlighting for removed parts)
|
||||
before_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
before_parts.append(text)
|
||||
elif op == -1: # Deletion (in before)
|
||||
before_parts.append(f'<span style="{REMOVED_INNER_STYLE}">{text}</span>')
|
||||
# Skip insertions (op == 1) for the before line
|
||||
|
||||
before_content = ''.join(before_parts)
|
||||
|
||||
# Build the after line (with nested highlighting for added parts)
|
||||
after_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
after_parts.append(text)
|
||||
elif op == 1: # Insertion (in after)
|
||||
after_parts.append(f'<span style="{ADDED_INNER_STYLE}">{text}</span>')
|
||||
# Skip deletions (op == -1) for the after line
|
||||
|
||||
after_content = ''.join(after_parts)
|
||||
|
||||
# Wrap content with placemarkers (inner HTML highlighting is preserved)
|
||||
before_html = f'{CHANGED_PLACEMARKER_OPEN}{before_content}{CHANGED_PLACEMARKER_CLOSED}'
|
||||
after_html = f'{CHANGED_INTO_PLACEMARKER_OPEN}{after_content}{CHANGED_INTO_PLACEMARKER_CLOSED}'
|
||||
|
||||
return before_html, after_html, has_changes
|
||||
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
|
||||
def customSequenceMatcher(
|
||||
before: List[str],
|
||||
after: List[str],
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
word_diff: bool = False,
|
||||
context_lines: int = 0,
|
||||
case_insensitive: bool = False,
|
||||
ignore_junk: bool = False,
|
||||
tokenizer: str = 'words_and_html'
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
|
||||
Args:
|
||||
before (List[str]): Original sequence
|
||||
after (List[str]): Modified sequence
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
case_insensitive (bool): Perform case-insensitive comparison
|
||||
ignore_junk (bool): Ignore whitespace-only changes
|
||||
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
# Prepare sequences for comparison (lowercase if case-insensitive, normalize whitespace if ignore_junk)
|
||||
def prepare_line(line):
|
||||
if case_insensitive:
|
||||
line = line.lower()
|
||||
if ignore_junk:
|
||||
# Normalize whitespace: replace multiple spaces/tabs with single space
|
||||
line = WHITESPACE_NORMALIZE_RE.sub(' ', line)
|
||||
return line
|
||||
|
||||
compare_before = [prepare_line(line) for line in before]
|
||||
compare_after = [prepare_line(line) for line in after]
|
||||
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=compare_before, b=compare_after)
|
||||
|
||||
# When context_lines is set and include_equal is False, we need to track which equal lines to include
|
||||
if context_lines > 0 and not include_equal:
|
||||
opcodes = list(cruncher.get_opcodes())
|
||||
# Mark equal ranges that should be included based on context
|
||||
included_equal_ranges = set()
|
||||
|
||||
for i, (tag, alo, ahi, blo, bhi) in enumerate(opcodes):
|
||||
if tag != 'equal':
|
||||
# Include context lines before this change
|
||||
for j in range(max(0, i - 1), i):
|
||||
if opcodes[j][0] == 'equal':
|
||||
prev_alo, prev_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include last N lines of the previous equal block
|
||||
context_start = max(prev_alo, prev_ahi - context_lines)
|
||||
for line_num in range(context_start, prev_ahi):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Include context lines after this change
|
||||
for j in range(i + 1, min(len(opcodes), i + 2)):
|
||||
if opcodes[j][0] == 'equal':
|
||||
next_alo, next_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include first N lines of the next equal block
|
||||
context_end = min(next_ahi, next_alo + context_lines)
|
||||
for line_num in range(next_alo, context_end):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Remember! gmail, outlook etc dont support <style> must be inline.
|
||||
# Gmail: strips <ins> and <del> tags entirely.
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if tag == 'equal':
|
||||
if include_equal:
|
||||
yield before[alo:ahi]
|
||||
elif context_lines > 0:
|
||||
# Only include equal lines that are in the context range
|
||||
context_lines_to_include = [before[i] for i in range(alo, ahi) if i in included_equal_ranges]
|
||||
if context_lines_to_include:
|
||||
yield context_lines_to_include
|
||||
elif include_removed and tag == 'delete':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
before_lines = same_slicer(before, alo, ahi)
|
||||
after_lines = same_slicer(after, blo, bhi)
|
||||
|
||||
# Use inline word-level diff for single line replacements when word_diff is enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
|
||||
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
|
||||
if ignore_junk and not has_changes:
|
||||
# No real changes, skip this line
|
||||
continue
|
||||
yield [inline_diff]
|
||||
else:
|
||||
# Fall back to line-level diff for multi-line changes
|
||||
if include_change_type_prefix:
|
||||
yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in before_lines] + \
|
||||
[f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in after_lines]
|
||||
else:
|
||||
yield before_lines + after_lines
|
||||
elif include_added and tag == 'insert':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(after, blo, bhi)
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
newest_version_file_contents: str,
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False,
|
||||
word_diff: bool = True,
|
||||
context_lines: int = 0,
|
||||
case_insensitive: bool = False,
|
||||
ignore_junk: bool = False,
|
||||
tokenizer: str = 'words_and_html'
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
|
||||
Args:
|
||||
previous_version_file_contents (str): Original file contents
|
||||
newest_version_file_contents (str): Modified file contents
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
case_insensitive (bool): Perform case-insensitive comparison, By default the test_json_diff/process.py is case sensitive, so this follows same logic
|
||||
ignore_junk (bool): Ignore whitespace-only changes
|
||||
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
|
||||
now = time.time()
|
||||
logger.debug(
|
||||
f"diff options: "
|
||||
f"include_equal={include_equal}, "
|
||||
f"include_removed={include_removed}, "
|
||||
f"include_added={include_added}, "
|
||||
f"include_replaced={include_replaced}, "
|
||||
f"include_change_type_prefix={include_change_type_prefix}, "
|
||||
f"patch_format={patch_format}, "
|
||||
f"word_diff={word_diff}, "
|
||||
f"context_lines={context_lines}, "
|
||||
f"case_insensitive={case_insensitive}, "
|
||||
f"ignore_junk={ignore_junk}, "
|
||||
f"tokenizer={tokenizer}"
|
||||
)
|
||||
if patch_format:
|
||||
patch = difflib.unified_diff(previous_lines, newest_lines)
|
||||
return "\n".join(patch)
|
||||
|
||||
rendered_diff = customSequenceMatcher(
|
||||
before=previous_lines,
|
||||
after=newest_lines,
|
||||
include_equal=include_equal,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix,
|
||||
word_diff=word_diff,
|
||||
context_lines=context_lines,
|
||||
case_insensitive=case_insensitive,
|
||||
ignore_junk=ignore_junk,
|
||||
tokenizer=tokenizer
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
result = []
|
||||
for x in lst:
|
||||
if isinstance(x, list):
|
||||
result.extend(x)
|
||||
else:
|
||||
result.append(x)
|
||||
return "\n".join(result)
|
||||
|
||||
logger.debug(f"Diff generated in {time.time() - now:.2f}s")
|
||||
|
||||
return flatten(rendered_diff)
|
||||
|
||||
|
||||
# Export main public API
|
||||
__all__ = [
|
||||
'render_diff',
|
||||
'customSequenceMatcher',
|
||||
'render_inline_word_diff',
|
||||
'render_nested_line_diff',
|
||||
'TOKENIZERS',
|
||||
'REMOVED_STYLE',
|
||||
'ADDED_STYLE',
|
||||
'REMOVED_INNER_STYLE',
|
||||
'ADDED_INNER_STYLE',
|
||||
]
|
||||
23
changedetectionio/diff/tokenizers/__init__.py
Normal file
23
changedetectionio/diff/tokenizers/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Tokenizers for diff operations.
|
||||
|
||||
This module provides various tokenization strategies for use with the diff system.
|
||||
New tokenizers can be easily added by:
|
||||
1. Creating a new module in this directory
|
||||
2. Importing and registering it in the TOKENIZERS dictionary below
|
||||
"""
|
||||
|
||||
from .natural_text import tokenize_words
|
||||
from .words_and_html import tokenize_words_and_html
|
||||
|
||||
# Tokenizer registry - maps tokenizer names to functions
|
||||
TOKENIZERS = {
|
||||
'words': tokenize_words,
|
||||
'words_and_html': tokenize_words_and_html,
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
'tokenize_words',
|
||||
'tokenize_words_and_html',
|
||||
'TOKENIZERS',
|
||||
]
|
||||
44
changedetectionio/diff/tokenizers/natural_text.py
Normal file
44
changedetectionio/diff/tokenizers/natural_text.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Simple word tokenizer using whitespace boundaries.
|
||||
|
||||
This is a simpler tokenizer that treats all whitespace as token boundaries
|
||||
without special handling for HTML tags or other markup.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def tokenize_words(text: str) -> List[str]:
|
||||
"""
|
||||
Split text into words using simple whitespace boundaries.
|
||||
|
||||
This is a simpler tokenizer that treats all whitespace as token boundaries
|
||||
without special handling for HTML tags.
|
||||
|
||||
Args:
|
||||
text: Input text to tokenize
|
||||
|
||||
Returns:
|
||||
List of tokens (words and whitespace)
|
||||
|
||||
Examples:
|
||||
>>> tokenize_words("Hello world")
|
||||
['Hello', ' ', 'world']
|
||||
>>> tokenize_words("one two")
|
||||
['one', ' ', ' ', 'two']
|
||||
"""
|
||||
tokens = []
|
||||
current = ''
|
||||
|
||||
for char in text:
|
||||
if char.isspace():
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
tokens.append(char)
|
||||
else:
|
||||
current += char
|
||||
|
||||
if current:
|
||||
tokens.append(current)
|
||||
return tokens
|
||||
61
changedetectionio/diff/tokenizers/words_and_html.py
Normal file
61
changedetectionio/diff/tokenizers/words_and_html.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Tokenizer that preserves HTML tags as atomic units while splitting on whitespace.
|
||||
|
||||
This tokenizer is specifically designed for HTML content where:
|
||||
- HTML tags should remain intact (e.g., '<p>', '<a href="...">')
|
||||
- Whitespace tokens are preserved for accurate diff reconstruction
|
||||
- Words are split on whitespace boundaries
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def tokenize_words_and_html(text: str) -> List[str]:
|
||||
"""
|
||||
Split text into words and boundaries (spaces, HTML tags).
|
||||
|
||||
This tokenizer preserves HTML tags as atomic units while splitting on whitespace.
|
||||
Useful for content that contains HTML markup.
|
||||
|
||||
Args:
|
||||
text: Input text to tokenize
|
||||
|
||||
Returns:
|
||||
List of tokens (words, spaces, HTML tags)
|
||||
|
||||
Examples:
|
||||
>>> tokenize_words_and_html("<p>Hello world</p>")
|
||||
['<p>', 'Hello', ' ', 'world', '</p>']
|
||||
>>> tokenize_words_and_html("<a href='test.com'>link</a>")
|
||||
['<a href=\\'test.com\\'>', 'link', '</a>']
|
||||
"""
|
||||
tokens = []
|
||||
current = ''
|
||||
in_tag = False
|
||||
|
||||
for char in text:
|
||||
if char == '<':
|
||||
# Start of HTML tag
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
current = '<'
|
||||
in_tag = True
|
||||
elif char == '>' and in_tag:
|
||||
# End of HTML tag
|
||||
current += '>'
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
in_tag = False
|
||||
elif char.isspace() and not in_tag:
|
||||
# Space outside of tag
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
tokens.append(char)
|
||||
else:
|
||||
current += char
|
||||
|
||||
if current:
|
||||
tokens.append(current)
|
||||
return tokens
|
||||
@@ -23,6 +23,7 @@ from flask import (
|
||||
render_template,
|
||||
request,
|
||||
send_from_directory,
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
@@ -34,13 +35,15 @@ from flask_cors import CORS
|
||||
# Make this a global singleton to avoid multiple signal objects
|
||||
watch_check_update = signal('watch_check_update', doc='Signal sent when a watch check is completed')
|
||||
from flask_wtf import CSRFProtect
|
||||
from flask_babel import Babel, gettext, get_locale
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -66,6 +69,10 @@ CORS(app)
|
||||
|
||||
# Super handy for compressing large BrowserSteps responses and others
|
||||
FlaskCompress(app)
|
||||
app.config['COMPRESS_MIN_SIZE'] = 4096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
# Stop browser caching of assets
|
||||
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
||||
@@ -78,9 +85,29 @@ if os.getenv('FLASK_SERVER_NAME'):
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
# Disables caching of the templates
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||
|
||||
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
|
||||
|
||||
# Configure Jinja2 to search for templates in plugin directories
|
||||
def _configure_plugin_templates():
|
||||
"""Configure Jinja2 loader to include plugin template directories."""
|
||||
from jinja2 import ChoiceLoader, FileSystemLoader
|
||||
from changedetectionio.pluggy_interface import get_plugin_template_paths
|
||||
|
||||
# Get plugin template paths
|
||||
plugin_template_paths = get_plugin_template_paths()
|
||||
|
||||
if plugin_template_paths:
|
||||
# Create a ChoiceLoader that searches app templates first, then plugin templates
|
||||
loaders = [app.jinja_loader] # Keep the default app loader first
|
||||
for path in plugin_template_paths:
|
||||
loaders.append(FileSystemLoader(path))
|
||||
|
||||
app.jinja_loader = ChoiceLoader(loaders)
|
||||
logger.info(f"Configured Jinja2 to search {len(plugin_template_paths)} plugin template directories")
|
||||
|
||||
# Configure plugin templates (called after plugins are loaded)
|
||||
_configure_plugin_templates()
|
||||
csrf = CSRFProtect()
|
||||
csrf.init_app(app)
|
||||
notification_debug_log=[]
|
||||
@@ -101,12 +128,12 @@ def init_app_secret(datastore_path):
|
||||
path = os.path.join(datastore_path, "secret.txt")
|
||||
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
with open(path, "r", encoding='utf-8') as f:
|
||||
secret = f.read()
|
||||
|
||||
except FileNotFoundError:
|
||||
import secrets
|
||||
with open(path, "w") as f:
|
||||
with open(path, "w", encoding='utf-8') as f:
|
||||
secret = secrets.token_hex(32)
|
||||
f.write(secret)
|
||||
|
||||
@@ -133,6 +160,11 @@ def get_socketio_path():
|
||||
# Socket.IO will be available at {prefix}/socket.io/
|
||||
return prefix
|
||||
|
||||
@app.template_global('is_safe_valid_url')
|
||||
def _is_safe_valid_url(test_url):
|
||||
from .validate_url import is_safe_valid_url
|
||||
return is_safe_valid_url(test_url)
|
||||
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
@@ -178,16 +210,26 @@ def _get_worker_status_info():
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
return gettext('Not yet')
|
||||
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time())
|
||||
locale = get_timeago_locale(str(get_locale()))
|
||||
try:
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time(), locale)
|
||||
except:
|
||||
# Fallback to English if locale not supported by timeago
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time(), 'en')
|
||||
|
||||
@app.template_filter('format_timestamp_timeago')
|
||||
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
||||
if not timestamp:
|
||||
return 'Not yet'
|
||||
return gettext('Not yet')
|
||||
|
||||
return timeago.format(int(timestamp), time.time())
|
||||
locale = get_timeago_locale(str(get_locale()))
|
||||
try:
|
||||
return timeago.format(int(timestamp), time.time(), locale)
|
||||
except:
|
||||
# Fallback to English if locale not supported by timeago
|
||||
return timeago.format(int(timestamp), time.time(), 'en')
|
||||
|
||||
|
||||
@app.template_filter('pagination_slice')
|
||||
@@ -201,10 +243,59 @@ def _jinja2_filter_pagination_slice(arr, skip):
|
||||
@app.template_filter('format_seconds_ago')
|
||||
def _jinja2_filter_seconds_precise(timestamp):
|
||||
if timestamp == False:
|
||||
return 'Not yet'
|
||||
return gettext('Not yet')
|
||||
|
||||
return format(int(time.time()-timestamp), ',d')
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
|
||||
This filter checks both built-in fetchers and plugin fetchers for status icons.
|
||||
|
||||
Args:
|
||||
fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing status icon elements
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
|
||||
from markupsafe import Markup
|
||||
from flask import url_for
|
||||
|
||||
icon_data = None
|
||||
|
||||
# First check if it's a plugin fetcher (plugins have priority)
|
||||
plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
|
||||
if plugin_icon_data:
|
||||
icon_data = plugin_icon_data
|
||||
# Check if it's a built-in fetcher
|
||||
elif hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
if hasattr(fetcher_class, 'get_status_icon_data'):
|
||||
icon_data = fetcher_class.get_status_icon_data()
|
||||
|
||||
# Build HTML from icon data
|
||||
if icon_data and isinstance(icon_data, dict):
|
||||
# Use 'group' from icon_data if specified, otherwise default to 'images'
|
||||
group = icon_data.get('group', 'images')
|
||||
|
||||
# Try to use url_for, but fall back to manual URL building if endpoint not registered yet
|
||||
try:
|
||||
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
|
||||
except:
|
||||
# Fallback: build URL manually respecting APPLICATION_ROOT
|
||||
from flask import request
|
||||
app_root = request.script_root if hasattr(request, 'script_root') else ''
|
||||
icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"
|
||||
|
||||
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
|
||||
html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
|
||||
return Markup(html)
|
||||
|
||||
return ''
|
||||
|
||||
# Import login_optionally_required from auth_decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -269,7 +360,33 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
login_manager = flask_login.LoginManager(app)
|
||||
login_manager.login_view = 'login'
|
||||
app.secret_key = init_app_secret(config['datastore_path'])
|
||||
|
||||
|
||||
# Initialize Flask-Babel for i18n support
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
def get_locale():
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
locale = session['locale']
|
||||
print(f"DEBUG: get_locale() returning from session: {locale}")
|
||||
return locale
|
||||
# 2. Fall back to Accept-Language header
|
||||
locale = request.accept_languages.best_match(language_codes)
|
||||
print(f"DEBUG: get_locale() returning from Accept-Language: {locale}")
|
||||
return locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
|
||||
# Make i18n functions available to templates
|
||||
app.jinja_env.globals.update(
|
||||
_=gettext,
|
||||
get_locale=get_locale,
|
||||
get_flag_for_locale=get_flag_for_locale,
|
||||
available_languages=available_languages
|
||||
)
|
||||
|
||||
# Set up a request hook to check authentication for all routes
|
||||
@app.before_request
|
||||
def check_authentication():
|
||||
@@ -302,6 +419,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
return login_manager.unauthorized()
|
||||
|
||||
|
||||
watch_api.add_resource(WatchHistoryDiff,
|
||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
@@ -353,6 +473,18 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@app.route('/set-language/<locale>')
|
||||
def set_language(locale):
|
||||
"""Set the user's preferred language in the session"""
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
session['locale'] = locale
|
||||
else:
|
||||
logger.error(f"Invalid locale {locale}, available: {language_codes}")
|
||||
|
||||
# Redirect back to the page they came from, or home
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39
|
||||
# You can divide up the stuff like this
|
||||
@app.route('/login', methods=['GET', 'POST'])
|
||||
@@ -382,7 +514,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# We would sometimes get login loop errors on sites hosted in sub-paths
|
||||
|
||||
# note for the future:
|
||||
# if not is_safe_url(next):
|
||||
# if not is_safe_valid_url(next):
|
||||
# return flask.abort(400)
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@@ -483,6 +615,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
# Handle plugin group specially
|
||||
if group == 'plugin':
|
||||
# Serve files from plugin static directories
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
import os as os_check
|
||||
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
if hasattr(plugin_obj, 'plugin_static_path'):
|
||||
try:
|
||||
static_path = plugin_obj.plugin_static_path()
|
||||
if static_path and os_check.path.isdir(static_path):
|
||||
# Check if file exists in plugin's static directory
|
||||
plugin_file_path = os_check.path.join(static_path, filename)
|
||||
if os_check.path.isfile(plugin_file_path):
|
||||
# Found the file in a plugin
|
||||
response = make_response(send_from_directory(static_path, filename))
|
||||
response.headers['Cache-Control'] = 'max-age=3600, public' # Cache for 1 hour
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.debug(f"Error checking plugin {plugin_name} for static file: {e}")
|
||||
pass
|
||||
|
||||
# File not found in any plugin
|
||||
abort(404)
|
||||
|
||||
# These files should be in our subdirectory
|
||||
try:
|
||||
return send_from_directory(f"static/{group}", path=filename)
|
||||
@@ -789,15 +946,19 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
# @todo - Maybe make this a hook?
|
||||
# Time schedule limit - Decide between watch or global settings
|
||||
scheduler_source = None
|
||||
if watch.get('time_between_check_use_default'):
|
||||
time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {})
|
||||
logger.trace(f"{uuid} Time scheduler - Using system/global settings")
|
||||
scheduler_source = 'system/global settings'
|
||||
|
||||
else:
|
||||
time_schedule_limit = watch.get('time_schedule_limit')
|
||||
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
|
||||
scheduler_source = 'watch'
|
||||
|
||||
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
|
||||
|
||||
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||
logger.trace(f"{uuid} Time scheduler - Using scheduler settings from {scheduler_source}")
|
||||
try:
|
||||
result = is_within_schedule(time_schedule_limit=time_schedule_limit,
|
||||
default_tz=tz_name
|
||||
@@ -809,6 +970,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
logger.error(
|
||||
f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}")
|
||||
return False
|
||||
|
||||
# If they supplied an individual entry minutes to threshold.
|
||||
threshold = recheck_time_system_seconds if watch.get('time_between_check_use_default') else watch.threshold_seconds()
|
||||
|
||||
|
||||
@@ -2,16 +2,21 @@ import os
|
||||
import re
|
||||
from loguru import logger
|
||||
from wtforms.widgets.core import TimeInput
|
||||
from flask_babel import lazy_gettext as _l, gettext
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
|
||||
from changedetectionio.conditions.form import ConditionFormRow
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, \
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio import processors
|
||||
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
Form,
|
||||
Field,
|
||||
FloatField,
|
||||
IntegerField,
|
||||
RadioField,
|
||||
SelectField,
|
||||
@@ -28,11 +33,8 @@ from wtforms.utils import unset_value
|
||||
|
||||
from wtforms.validators import ValidationError
|
||||
|
||||
from validators.url import url as url_validator
|
||||
|
||||
from changedetectionio.widgets import TernaryNoneBooleanField
|
||||
|
||||
|
||||
# default
|
||||
# each select <option data-enabled="enabled-0-0"
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
@@ -58,8 +60,8 @@ valid_method = {
|
||||
|
||||
default_method = 'GET'
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.'
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT=_l('At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.')
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT=_l('At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.')
|
||||
|
||||
class StringListField(StringField):
|
||||
widget = widgets.TextArea()
|
||||
@@ -159,7 +161,7 @@ class TimeStringField(Field):
|
||||
time_str = valuelist[0]
|
||||
# Simple validation for HH:MM format
|
||||
if not time_str or len(time_str.split(":")) != 2:
|
||||
raise ValidationError("Invalid time format. Use HH:MM.")
|
||||
raise ValidationError(_l("Invalid time format. Use HH:MM."))
|
||||
self.data = time_str
|
||||
|
||||
|
||||
@@ -175,15 +177,15 @@ class validateTimeZoneName(object):
|
||||
from zoneinfo import available_timezones
|
||||
python_timezones = available_timezones()
|
||||
if field.data and field.data not in python_timezones:
|
||||
raise ValidationError("Not a valid timezone name")
|
||||
raise ValidationError(_l("Not a valid timezone name"))
|
||||
|
||||
class ScheduleLimitDaySubForm(Form):
|
||||
enabled = BooleanField("not set", default=True)
|
||||
start_time = TimeStringField("Start At", default="00:00", validators=[validators.Optional()])
|
||||
duration = FormField(TimeDurationForm, label="Run duration")
|
||||
enabled = BooleanField(_l("not set"), default=True)
|
||||
start_time = TimeStringField(_l("Start At"), default="00:00", validators=[validators.Optional()])
|
||||
duration = FormField(TimeDurationForm, label=_l("Run duration"))
|
||||
|
||||
class ScheduleLimitForm(Form):
|
||||
enabled = BooleanField("Use time scheduler", default=False)
|
||||
enabled = BooleanField(_l("Use time scheduler"), default=False)
|
||||
# Because the label for=""" doesnt line up/work with the actual checkbox
|
||||
monday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
tuesday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
@@ -193,7 +195,7 @@ class ScheduleLimitForm(Form):
|
||||
saturday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
sunday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
|
||||
timezone = StringField("Optional timezone to run in",
|
||||
timezone = StringField(_l("Optional timezone to run in"),
|
||||
render_kw={"list": "timezones"},
|
||||
validators=[validateTimeZoneName()]
|
||||
)
|
||||
@@ -207,13 +209,13 @@ class ScheduleLimitForm(Form):
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||
self.monday.form.enabled.label.text="Monday"
|
||||
self.tuesday.form.enabled.label.text = "Tuesday"
|
||||
self.wednesday.form.enabled.label.text = "Wednesday"
|
||||
self.thursday.form.enabled.label.text = "Thursday"
|
||||
self.friday.form.enabled.label.text = "Friday"
|
||||
self.saturday.form.enabled.label.text = "Saturday"
|
||||
self.sunday.form.enabled.label.text = "Sunday"
|
||||
self.monday.form.enabled.label.text=_l("Monday")
|
||||
self.tuesday.form.enabled.label.text = _l("Tuesday")
|
||||
self.wednesday.form.enabled.label.text = _l("Wednesday")
|
||||
self.thursday.form.enabled.label.text = _l("Thursday")
|
||||
self.friday.form.enabled.label.text = _l("Friday")
|
||||
self.saturday.form.enabled.label.text = _l("Saturday")
|
||||
self.sunday.form.enabled.label.text = _l("Sunday")
|
||||
|
||||
|
||||
def validate_time_between_check_has_values(form):
|
||||
@@ -238,7 +240,7 @@ class RequiredTimeInterval(object):
|
||||
Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]).
|
||||
"""
|
||||
def __init__(self, message=None):
|
||||
self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
self.message = message or _l('At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.')
|
||||
|
||||
def __call__(self, form, field):
|
||||
if not validate_time_between_check_has_values(field.form):
|
||||
@@ -246,11 +248,11 @@ class RequiredTimeInterval(object):
|
||||
|
||||
|
||||
class TimeBetweenCheckForm(Form):
|
||||
weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
hours = IntegerField('Hours', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
minutes = IntegerField('Minutes', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
weeks = IntegerField(_l('Weeks'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
days = IntegerField(_l('Days'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
hours = IntegerField(_l('Hours'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
minutes = IntegerField(_l('Minutes'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
seconds = IntegerField(_l('Seconds'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
|
||||
|
||||
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
|
||||
@@ -506,7 +508,9 @@ class ValidateJinja2Template(object):
|
||||
jinja2_env = create_jinja_env(loader=BaseLoader)
|
||||
|
||||
# Add notification tokens for validation
|
||||
jinja2_env.globals.update(NotificationContextData())
|
||||
static_token_placeholders = NotificationContextData()
|
||||
static_token_placeholders.set_random_for_validation()
|
||||
jinja2_env.globals.update(static_token_placeholders)
|
||||
if hasattr(field, 'extra_notification_tokens'):
|
||||
jinja2_env.globals.update(field.extra_notification_tokens)
|
||||
|
||||
@@ -541,19 +545,10 @@ class validateURL(object):
|
||||
|
||||
|
||||
def validate_url(test_url):
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
try:
|
||||
url_validator(test_url, simple_host=allow_simplehost)
|
||||
except validators.ValidationError:
|
||||
#@todo check for xss
|
||||
message = f"'{test_url}' is not a valid URL."
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
if not is_safe_valid_url(test_url):
|
||||
# This should be wtforms.validators.
|
||||
raise ValidationError(message)
|
||||
|
||||
from .model.Watch import is_safe_url
|
||||
if not is_safe_url(test_url):
|
||||
# This should be wtforms.validators.
|
||||
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
|
||||
raise ValidationError('Watch protocol is not permitted or invalid URL format')
|
||||
|
||||
|
||||
class ValidateSinglePythonRegexString(object):
|
||||
@@ -727,19 +722,16 @@ class ValidateStartsWithRegex(object):
|
||||
if not stripped:
|
||||
if self.allow_empty:
|
||||
continue
|
||||
raise ValidationError(self.message or "Empty value not allowed.")
|
||||
raise ValidationError(self.message or _l("Empty value not allowed."))
|
||||
if not self.pattern.match(stripped):
|
||||
raise ValidationError(self.message or "Invalid value.")
|
||||
raise ValidationError(self.message or _l("Invalid value."))
|
||||
|
||||
class quickWatchForm(Form):
|
||||
from . import processors
|
||||
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()])
|
||||
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
# Common to a single watch and the global settings
|
||||
@@ -752,14 +744,14 @@ class commonSettingsForm(Form):
|
||||
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
|
||||
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
|
||||
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
|
||||
scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
|
||||
fetch_backend = RadioField(_l('Fetch Method'), choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||
|
||||
# Not true anymore but keep the validate_ hook for future use, we convert color tags
|
||||
# def validate_notification_urls(self, field):
|
||||
@@ -771,22 +763,21 @@ class commonSettingsForm(Form):
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
from . import processors
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
urls = TextAreaField('URLs')
|
||||
xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')])
|
||||
file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
urls = TextAreaField(_l('URLs'))
|
||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
|
||||
class SingleBrowserStep(Form):
|
||||
|
||||
operation = SelectField('Operation', [validators.Optional()], choices=browser_step_ui_config.keys())
|
||||
operation = SelectField(_l('Operation'), [validators.Optional()], choices=browser_step_ui_config.keys())
|
||||
|
||||
# maybe better to set some <script>var..
|
||||
selector = StringField('Selector', [validators.Optional()], render_kw={"placeholder": "CSS or xPath selector"})
|
||||
optional_value = StringField('value', [validators.Optional()], render_kw={"placeholder": "Value"})
|
||||
selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": "CSS or xPath selector"})
|
||||
optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": "Value"})
|
||||
# @todo move to JS? ajax fetch new field?
|
||||
# remove_button = SubmitField('-', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
|
||||
# add_button = SubmitField('+', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
|
||||
# remove_button = SubmitField(_l('-'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
|
||||
# add_button = SubmitField(_l('+'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
|
||||
|
||||
class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
@@ -802,49 +793,48 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
|
||||
time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False)
|
||||
time_between_check_use_default = BooleanField(_l('Use global settings for time between check and scheduler.'), default=False)
|
||||
|
||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||
include_filters = StringListField(_l('CSS/JSONPath/JQ/XPath Filters'), [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
|
||||
extract_text = StringListField('Extract text', [ValidateListRegex()])
|
||||
extract_text = StringListField(_l('Extract text'), [ValidateListRegex()])
|
||||
|
||||
title = StringField('Title', default='')
|
||||
title = StringField(_l('Title'), default='')
|
||||
|
||||
ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()])
|
||||
ignore_text = StringListField(_l('Ignore lines containing'), [ValidateListRegex()])
|
||||
headers = StringDictKeyValue('Request headers')
|
||||
body = TextAreaField('Request body', [validators.Optional()])
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
||||
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
||||
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
||||
strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
|
||||
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
||||
body = TextAreaField(_l('Request body'), [validators.Optional()])
|
||||
method = SelectField(_l('Request method'), choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField(_l('Ignore status codes (process non-2xx status codes as normal)'), default=False)
|
||||
check_unique_lines = BooleanField(_l('Only trigger when unique lines appear in all history'), default=False)
|
||||
remove_duplicate_lines = BooleanField(_l('Remove duplicate lines of text'), default=False)
|
||||
sort_text_alphabetically = BooleanField(_l('Sort text alphabetically'), default=False)
|
||||
strip_ignored_lines = TernaryNoneBooleanField(_l('Strip ignored lines'), default=None)
|
||||
trim_text_whitespace = BooleanField(_l('Trim whitespace before and after text'), default=False)
|
||||
|
||||
filter_text_added = BooleanField('Added lines', default=True)
|
||||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||
filter_text_removed = BooleanField('Removed lines', default=True)
|
||||
filter_text_added = BooleanField(_l('Added lines'), default=True)
|
||||
filter_text_replaced = BooleanField(_l('Replaced/changed lines'), default=True)
|
||||
filter_text_removed = BooleanField(_l('Removed lines'), default=True)
|
||||
|
||||
trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
trigger_text = StringListField(_l('Keyword triggers - Trigger/wait for text'), [validators.Optional(), ValidateListRegex()])
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
text_should_not_be_present = StringListField(_l('Block change-detection while text matches'), [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField(_l('Execute JavaScript before change detection'), render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
proxy = RadioField('Proxy')
|
||||
proxy = RadioField(_l('Proxy'))
|
||||
# filter_failure_notification_send @todo make ternary
|
||||
filter_failure_notification_send = BooleanField(
|
||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||
notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On")
|
||||
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||
filter_failure_notification_send = BooleanField(_l('Send a notification when the filter can no longer be found on the page'), default=False)
|
||||
notification_muted = TernaryNoneBooleanField(_l('Notifications'), default=None, yes_text=_l("Muted"), no_text=_l("On"))
|
||||
notification_screenshot = BooleanField(_l('Attach screenshot to notification (where possible)'), default=False)
|
||||
|
||||
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
|
||||
conditions_match_logic = RadioField(_l('Match'), choices=[('ALL', _l('Match all of the following')),('ANY', _l('Match any of the following'))], default='ALL')
|
||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||
use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None)
|
||||
use_page_title_in_list = TernaryNoneBooleanField(_l('Use page <title> in list'), default=None)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
@@ -861,7 +851,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
# Fail form validation when a body is set for a GET
|
||||
if self.method.data == 'GET' and self.body.data:
|
||||
self.body.errors.append('Body must be empty when Request Method is set to GET')
|
||||
self.body.errors.append(gettext('Body must be empty when Request Method is set to GET'))
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the URL
|
||||
@@ -870,11 +860,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.url.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
self.url.errors.append(gettext('Invalid template syntax configuration: %(error)s') % {'error': e})
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.url.errors.append(f'Invalid template syntax: {e}')
|
||||
self.url.errors.append(gettext('Invalid template syntax: %(error)s') % {'error': e})
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the body
|
||||
@@ -884,11 +874,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.body.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
self.body.errors.append(gettext('Invalid template syntax configuration: %(error)s') % {'error': e})
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.body.errors.append(f'Invalid template syntax: {e}')
|
||||
self.body.errors.append(gettext('Invalid template syntax: %(error)s') % {'error': e})
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the headers
|
||||
@@ -899,11 +889,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.headers.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
self.headers.errors.append(gettext('Invalid template syntax configuration: %(error)s') % {'error': e})
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.headers.errors.append(f'Invalid template syntax in "{header}" header: {e}')
|
||||
self.headers.errors.append(gettext('Invalid template syntax in \"%(header)s\" header: %(error)s') % {'header': header, 'error': e})
|
||||
result = False
|
||||
|
||||
return result
|
||||
@@ -927,106 +917,122 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
class SingleExtraProxy(Form):
|
||||
# maybe better to set some <script>var..
|
||||
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
proxy_url = StringField('Proxy URL', [
|
||||
proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
proxy_url = StringField(_l('Proxy URL'), [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(https?|socks5)://', # ✅ main pattern
|
||||
flags=re.IGNORECASE, # ✅ makes it case-insensitive
|
||||
message='Proxy URLs must start with http://, https:// or socks5://',
|
||||
message=_l('Proxy URLs must start with http://, https:// or socks5://'),
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||
|
||||
class SingleExtraBrowser(Form):
|
||||
browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
browser_connection_url = StringField('Browser connection URL', [
|
||||
browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
browser_connection_url = StringField(_l('Browser connection URL'), [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(wss?|ws)://',
|
||||
flags=re.IGNORECASE,
|
||||
message='Browser URLs must start with wss:// or ws://'
|
||||
message=_l('Browser URLs must start with wss:// or ws://')
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
|
||||
class DefaultUAInputForm(Form):
|
||||
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
html_requests = StringField(_l('Plaintext requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
|
||||
html_webdriver = StringField('Chrome requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
html_webdriver = StringField(_l('Chrome requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
proxy = RadioField('Default proxy')
|
||||
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
||||
proxy = RadioField(_l('Default proxy'))
|
||||
jitter_seconds = IntegerField(_l('Random jitter seconds ± check'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
validators=[validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
|
||||
workers = IntegerField('Number of fetch workers',
|
||||
workers = IntegerField(_l('Number of fetch workers'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=50,
|
||||
message="Should be between 1 and 50")])
|
||||
message=_l("Should be between 1 and 50"))])
|
||||
|
||||
timeout = IntegerField('Requests timeout in seconds',
|
||||
timeout = IntegerField(_l('Requests timeout in seconds'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=999,
|
||||
message="Should be between 1 and 999")])
|
||||
message=_l("Should be between 1 and 999"))])
|
||||
|
||||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||
|
||||
default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides")
|
||||
default_ua = FormField(DefaultUAInputForm, label=_l("Default User-Agent overrides"))
|
||||
|
||||
def validate_extra_proxies(self, extra_validators=None):
|
||||
for e in self.data['extra_proxies']:
|
||||
if e.get('proxy_name') or e.get('proxy_url'):
|
||||
if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip():
|
||||
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
|
||||
self.extra_proxies.errors.append(gettext('Both a name, and a Proxy URL is required.'))
|
||||
return False
|
||||
|
||||
class globalSettingsApplicationUIForm(Form):
|
||||
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
|
||||
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
||||
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
||||
use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True
|
||||
open_diff_in_new_tab = BooleanField(_l("Open 'History' page in a new tab"), default=True, validators=[validators.Optional()])
|
||||
socket_io_enabled = BooleanField(_l('Realtime UI Updates Enabled'), default=True, validators=[validators.Optional()])
|
||||
favicons_enabled = BooleanField(_l('Favicons Enabled'), default=True, validators=[validators.Optional()])
|
||||
use_page_title_in_list = BooleanField(_l('Use page <title> in watch overview list')) #BooleanField=True
|
||||
|
||||
# datastore.data['settings']['application']..
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
|
||||
base_url = StringField('Notification base URL override',
|
||||
api_access_token_enabled = BooleanField(_l('API access token security check enabled'), default=True, validators=[validators.Optional()])
|
||||
base_url = StringField(_l('Notification base URL override'),
|
||||
validators=[validators.Optional()],
|
||||
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
|
||||
)
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
empty_pages_are_a_change = BooleanField(_l('Treat empty pages as a change?'), default=False)
|
||||
fetch_backend = RadioField(_l('Fetch Method'), default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField(_l('Ignore Text'), [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
ignore_whitespace = BooleanField(_l('Ignore whitespace'))
|
||||
|
||||
# Screenshot comparison settings
|
||||
min_change_percentage = FloatField(
|
||||
'Screenshot: Minimum Change Percentage',
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=0.0, max=100.0, message=_l('Must be between 0 and 100'))
|
||||
],
|
||||
default=0.1,
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField()
|
||||
pager_size = IntegerField('Pager size',
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should be atleast zero (disabled)")])
|
||||
message=_l("Should be atleast zero (disabled)"))])
|
||||
|
||||
rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES)
|
||||
rss_content_format = SelectField(_l('RSS Content format'), choices=list(RSS_FORMAT_TYPES.items()))
|
||||
rss_template_type = SelectField(_l('RSS <description> body built from'), choices=list(RSS_TEMPLATE_TYPE_OPTIONS.items()))
|
||||
rss_template_override = TextAreaField(_l('RSS "System default" template override'), render_kw={"rows": "5", "placeholder": RSS_TEMPLATE_HTML_DEFAULT}, validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
strip_ignored_lines = BooleanField('Strip ignored lines')
|
||||
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
|
||||
removepassword_button = SubmitField(_l('Remove password'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField(_l('Render anchor tag content'), default=False)
|
||||
shared_diff_access = BooleanField(_l('Allow anonymous access to watch history page when password is enabled'), default=False, validators=[validators.Optional()])
|
||||
strip_ignored_lines = BooleanField(_l('Strip ignored lines'))
|
||||
rss_hide_muted_watches = BooleanField(_l('Hide muted watches from RSS feed'), default=True,
|
||||
validators=[validators.Optional()])
|
||||
|
||||
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
|
||||
validators=[validators.Optional()])
|
||||
rss_reader_mode = BooleanField(_l('Enable RSS reader mode '), default=False, validators=[validators.Optional()])
|
||||
rss_diff_length = IntegerField(label=_l('Number of changes to show in watch RSS feed'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0, message=_l("Should contain zero or more attempts"))])
|
||||
|
||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||
filter_failure_notification_threshold_attempts = IntegerField(_l('Number of times the filter can be missing before sending a notification'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should contain zero or more attempts")])
|
||||
message=_l("Should contain zero or more attempts"))])
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
|
||||
@@ -1042,9 +1048,9 @@ class globalSettingsForm(Form):
|
||||
|
||||
requests = FormField(globalSettingsRequestForm)
|
||||
application = FormField(globalSettingsApplicationForm)
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
class extractDataForm(Form):
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
||||
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
|
||||
extract_regex = StringField(_l('RegEx to extract'), validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
||||
extract_submit_button = SubmitField(_l('Extract as CSV'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from loguru import logger
|
||||
from typing import List
|
||||
import html
|
||||
@@ -13,7 +15,6 @@ TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
|
||||
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
|
||||
@@ -22,9 +23,9 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
def perl_style_slash_enclosed_regex_to_options(regex):
|
||||
|
||||
res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE)
|
||||
@@ -171,99 +172,131 @@ def elementpath_tostring(obj):
|
||||
return str(obj)
|
||||
|
||||
# Return str Utf-8 of matched rules
|
||||
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
|
||||
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
|
||||
"""
|
||||
|
||||
:param xpath_filter:
|
||||
:param html_content:
|
||||
:param append_pretty_line_formatting:
|
||||
:param is_xml: set to true if is XML or is RSS (RSS is XML)
|
||||
:return:
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
if is_rss:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
# Solution: Register the default namespace with empty string prefix in elementpath
|
||||
# This is primarily for RSS/Atom feeds but works for any XML with default namespace
|
||||
if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
|
||||
# Register the default namespace with empty string prefix for elementpath
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
if type(r) != list:
|
||||
r = [r]
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
if type(element) == str:
|
||||
html_block += element
|
||||
elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
||||
tree = None
|
||||
try:
|
||||
if is_xml:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
|
||||
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
tree = html.fromstring(html_content, parser=parser)
|
||||
html_block = ""
|
||||
|
||||
return html_block
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
# Solution: Register the default namespace with empty string prefix in elementpath
|
||||
# This is primarily for RSS/Atom feeds but works for any XML with default namespace
|
||||
if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
|
||||
# Register the default namespace with empty string prefix for elementpath
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
if type(r) != list:
|
||||
r = [r]
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
if type(element) == str:
|
||||
html_block += element
|
||||
elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
|
||||
# Use 'xml' method for RSS/XML content, 'html' for HTML content
|
||||
# parser will be XMLParser if we detected XML content
|
||||
method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
|
||||
html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
# lxml trees can hold significant memory, especially with large documents
|
||||
if tree is not None:
|
||||
tree.clear()
|
||||
|
||||
# Return str Utf-8 of matched rules
|
||||
# 'xpath1:'
|
||||
def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
|
||||
def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
|
||||
from lxml import etree, html
|
||||
|
||||
parser = None
|
||||
if is_rss:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
|
||||
# For documents with default namespace (RSS/Atom feeds), users must use:
|
||||
# - local-name(): //*[local-name()='title']/text()
|
||||
# - Or use xpath_filter (not xpath1_filter) which supports default namespaces
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
|
||||
#@note: xpath1 (lxml) does NOT automatically handle default namespaces
|
||||
#@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
# Some kind of text, UTF-8 or other
|
||||
if isinstance(element, (str, bytes)):
|
||||
html_block += element
|
||||
tree = None
|
||||
try:
|
||||
if is_xml:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
|
||||
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
|
||||
else:
|
||||
# Return the HTML which will get parsed as text
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
||||
tree = html.fromstring(html_content, parser=parser)
|
||||
html_block = ""
|
||||
|
||||
return html_block
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
|
||||
# For documents with default namespace (RSS/Atom feeds), users must use:
|
||||
# - local-name(): //*[local-name()='title']/text()
|
||||
# - Or use xpath_filter (not xpath1_filter) which supports default namespaces
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
|
||||
#@note: xpath1 (lxml) does NOT automatically handle default namespaces
|
||||
#@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
# Some kind of text, UTF-8 or other
|
||||
if isinstance(element, (str, bytes)):
|
||||
html_block += element
|
||||
else:
|
||||
# Return the HTML/XML which will get parsed as text
|
||||
# Use 'xml' method for RSS/XML content, 'html' for HTML content
|
||||
# parser will be XMLParser if we detected XML content
|
||||
method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
|
||||
html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
|
||||
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
# lxml trees can hold significant memory, especially with large documents
|
||||
if tree is not None:
|
||||
tree.clear()
|
||||
|
||||
# Extract/find element
|
||||
def extract_element(find='title', html_content=''):
|
||||
@@ -431,6 +464,9 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
ignore_regex_multiline = []
|
||||
ignored_lines = []
|
||||
|
||||
if not content:
|
||||
return ''
|
||||
|
||||
for k in wordlist:
|
||||
# Skip empty strings to avoid matching everything
|
||||
if not k or not k.strip():
|
||||
|
||||
103
changedetectionio/languages.py
Normal file
103
changedetectionio/languages.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Language configuration for i18n support
|
||||
Automatically discovers available languages from translations directory
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_timeago_locale(flask_locale):
|
||||
"""
|
||||
Convert Flask-Babel locale codes to timeago library locale codes.
|
||||
|
||||
The Python timeago library (https://github.com/hustcc/timeago) supports 48 locales
|
||||
but uses different naming conventions than Flask-Babel. This function maps between them.
|
||||
|
||||
Notable differences:
|
||||
- Chinese: Flask uses 'zh', timeago uses 'zh_CN'
|
||||
- Portuguese: Flask uses 'pt', timeago uses 'pt_PT' or 'pt_BR'
|
||||
- Swedish: Flask uses 'sv', timeago uses 'sv_SE'
|
||||
- Norwegian: Flask uses 'no', timeago uses 'nb_NO' or 'nn_NO'
|
||||
- Hindi: Flask uses 'hi', timeago uses 'in_HI'
|
||||
- Czech: Flask uses 'cs', but timeago doesn't support Czech - fallback to English
|
||||
|
||||
Args:
|
||||
flask_locale (str): Flask-Babel locale code (e.g., 'cs', 'zh', 'pt')
|
||||
|
||||
Returns:
|
||||
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
# Language metadata: flag icon CSS class and native name
|
||||
# Using flag-icons library: https://flagicons.lipis.dev/
|
||||
LANGUAGE_DATA = {
|
||||
'en': {'flag': 'fi fi-gb fis', 'name': 'English'},
|
||||
'de': {'flag': 'fi fi-de fis', 'name': 'Deutsch'},
|
||||
'fr': {'flag': 'fi fi-fr fis', 'name': 'Français'},
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
|
||||
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
'zh_TW': {'flag': 'fi fi-tw fis', 'name': '繁體中文'},
|
||||
'ru': {'flag': 'fi fi-ru fis', 'name': 'Русский'},
|
||||
'pl': {'flag': 'fi fi-pl fis', 'name': 'Polski'},
|
||||
'nl': {'flag': 'fi fi-nl fis', 'name': 'Nederlands'},
|
||||
'sv': {'flag': 'fi fi-se fis', 'name': 'Svenska'},
|
||||
'da': {'flag': 'fi fi-dk fis', 'name': 'Dansk'},
|
||||
'no': {'flag': 'fi fi-no fis', 'name': 'Norsk'},
|
||||
'fi': {'flag': 'fi fi-fi fis', 'name': 'Suomi'},
|
||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||
}
|
||||
|
||||
|
||||
def get_available_languages():
|
||||
"""
|
||||
Discover available languages by scanning the translations directory
|
||||
Returns a dict of available languages with their metadata
|
||||
"""
|
||||
translations_dir = Path(__file__).parent / 'translations'
|
||||
|
||||
# Always include English as base language
|
||||
available = {
|
||||
'en': LANGUAGE_DATA['en']
|
||||
}
|
||||
|
||||
# Scan for translation directories
|
||||
if translations_dir.exists():
|
||||
for lang_dir in translations_dir.iterdir():
|
||||
if lang_dir.is_dir() and lang_dir.name in LANGUAGE_DATA:
|
||||
# Check if messages.po exists
|
||||
po_file = lang_dir / 'LC_MESSAGES' / 'messages.po'
|
||||
if po_file.exists():
|
||||
available[lang_dir.name] = LANGUAGE_DATA[lang_dir.name]
|
||||
|
||||
return available
|
||||
|
||||
|
||||
def get_language_codes():
|
||||
"""Get list of available language codes"""
|
||||
return list(get_available_languages().keys())
|
||||
|
||||
|
||||
def get_flag_for_locale(locale):
|
||||
"""Get flag emoji for a locale, or globe if unknown"""
|
||||
return LANGUAGE_DATA.get(locale, {}).get('flag', '🌐')
|
||||
|
||||
|
||||
def get_name_for_locale(locale):
|
||||
"""Get native name for a locale"""
|
||||
return LANGUAGE_DATA.get(locale, {}).get('name', locale.upper())
|
||||
@@ -1,6 +1,7 @@
|
||||
from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
@@ -45,6 +46,7 @@ class model(dict):
|
||||
'global_subtractive_selectors': [],
|
||||
'ignore_whitespace': True,
|
||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
||||
'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'notification_title': default_notification_title,
|
||||
@@ -53,7 +55,10 @@ class model(dict):
|
||||
'password': False,
|
||||
'render_anchor_tag_content': False,
|
||||
'rss_access_token': None,
|
||||
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
||||
'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT,
|
||||
'rss_template_type': 'system_default',
|
||||
'rss_template_override': None,
|
||||
'rss_diff_length': 5,
|
||||
'rss_hide_muted_watches': True,
|
||||
'rss_reader_mode': False,
|
||||
'scheduler_timezone_default': None, # Default IANA timezone name
|
||||
@@ -74,12 +79,13 @@ class model(dict):
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
self.update(self.base_config)
|
||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||
self.update(deepcopy(self.base_config))
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
headers = {}
|
||||
with open(filepath, 'r') as f:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
for l in f.readlines():
|
||||
l = l.strip()
|
||||
if not l.startswith('#') and ':' in l:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
@@ -12,32 +13,143 @@ from .. import jinja2_custom as safe_jinja
|
||||
from ..diff import ADDED_PLACEMARKER_OPEN
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
# Allowable protocols, protects against javascript: etc
|
||||
# file:// is further checked by ALLOW_FILE_URI
|
||||
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
|
||||
|
||||
def _brotli_compress_worker(conn, filepath, mode=None):
|
||||
"""
|
||||
Worker function to compress data with brotli in a separate process.
|
||||
This isolates memory - when process exits, OS reclaims all memory.
|
||||
|
||||
Args:
|
||||
conn: multiprocessing.Pipe connection to receive data
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
"""
|
||||
import brotli
|
||||
|
||||
try:
|
||||
# Receive data from parent process via pipe (avoids pickle overhead)
|
||||
contents = conn.recv()
|
||||
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
|
||||
# Send success status back
|
||||
conn.send(True)
|
||||
# No need for explicit cleanup - process exit frees all memory
|
||||
except Exception as e:
|
||||
logger.error(f"Brotli compression worker failed: {e}")
|
||||
conn.send(False)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using subprocess to isolate memory.
|
||||
Uses Pipe to avoid pickle overhead for large data.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
timeout: subprocess timeout in seconds
|
||||
fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception
|
||||
|
||||
Returns:
|
||||
str: actual filepath saved (may differ from input if fallback used)
|
||||
|
||||
Raises:
|
||||
Exception: if compression fails and fallback_uncompressed is False
|
||||
"""
|
||||
import brotli
|
||||
import multiprocessing
|
||||
import sys
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
# Use explicit spawn context for thread safety (avoids fork() with multi-threaded parent)
|
||||
# Always use spawn - consistent behavior in tests and production
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
# Run compression in subprocess using spawn (not fork)
|
||||
proc = ctx.Process(target=_brotli_compress_worker, args=(child_conn, filepath, mode))
|
||||
|
||||
# Windows-safe: Set daemon=False explicitly to avoid issues with process cleanup
|
||||
proc.daemon = False
|
||||
proc.start()
|
||||
|
||||
try:
|
||||
# Send data to subprocess via pipe (avoids pickle)
|
||||
parent_conn.send(contents)
|
||||
|
||||
# Wait for result with timeout
|
||||
if parent_conn.poll(timeout):
|
||||
success = parent_conn.recv()
|
||||
else:
|
||||
success = False
|
||||
logger.warning(f"Brotli compression subprocess timed out after {timeout}s")
|
||||
# Graceful termination with platform-aware cleanup
|
||||
try:
|
||||
proc.terminate()
|
||||
except Exception as term_error:
|
||||
logger.debug(f"Process termination issue (may be normal on Windows): {term_error}")
|
||||
|
||||
parent_conn.close()
|
||||
proc.join(timeout=5)
|
||||
|
||||
# Force kill if still alive after graceful termination
|
||||
if proc.is_alive():
|
||||
try:
|
||||
if sys.platform == 'win32':
|
||||
# Windows: use kill() which is more forceful
|
||||
proc.kill()
|
||||
else:
|
||||
# Unix: terminate() already sent SIGTERM, now try SIGKILL
|
||||
proc.kill()
|
||||
proc.join(timeout=2)
|
||||
except Exception as kill_error:
|
||||
logger.warning(f"Failed to kill brotli compression process: {kill_error}")
|
||||
|
||||
# Check if file was created successfully
|
||||
if success and os.path.exists(filepath):
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Brotli compression error: {e}")
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
proc.terminate()
|
||||
proc.join(timeout=2)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Compression failed
|
||||
if fallback_uncompressed:
|
||||
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
|
||||
fallback_path = filepath.replace('.br', '')
|
||||
with open(fallback_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
return fallback_path
|
||||
else:
|
||||
raise Exception(f"Brotli compression subprocess failed for {filepath}")
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
|
||||
def is_safe_url(test_url):
|
||||
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
|
||||
|
||||
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
|
||||
# 'source:' is a valid way to tell us to return the source
|
||||
|
||||
r = re.compile(re.escape('source:'), re.IGNORECASE)
|
||||
test_url = r.sub('', test_url)
|
||||
|
||||
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
|
||||
if not pattern.match(test_url.strip()):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
@@ -80,7 +192,7 @@ class model(watch_base):
|
||||
def link(self):
|
||||
|
||||
url = self.get('url', '')
|
||||
if not is_safe_url(url):
|
||||
if not is_safe_valid_url(url):
|
||||
return 'DISABLED'
|
||||
|
||||
ready_url = url
|
||||
@@ -101,7 +213,7 @@ class model(watch_base):
|
||||
ready_url=ready_url.replace('source:', '')
|
||||
|
||||
# Also double check it after any Jinja2 formatting just incase
|
||||
if not is_safe_url(ready_url):
|
||||
if not is_safe_valid_url(ready_url):
|
||||
return 'DISABLED'
|
||||
return ready_url
|
||||
|
||||
@@ -112,11 +224,29 @@ class model(watch_base):
|
||||
domain = parsed.hostname
|
||||
return domain
|
||||
|
||||
@property
|
||||
def history_index_filename(self):
|
||||
# So that you dont try to view different histories in different 'diff' setups, can confuse cdio.
|
||||
processor = self.get('processor')
|
||||
if not processor or self.get('processor') == 'text_json_diff':
|
||||
return 'history.txt'
|
||||
else:
|
||||
return f'history-{processor}.txt'
|
||||
|
||||
def clear_watch(self):
|
||||
import pathlib
|
||||
|
||||
# Get list of processor config files to preserve
|
||||
from changedetectionio.processors import find_processors
|
||||
processor_names = [name for cls, name in find_processors()]
|
||||
processor_config_files = {f"{name}.json" for name in processor_names}
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
# But preserve processor config files (they're configuration, not history data)
|
||||
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
|
||||
# Skip processor config files
|
||||
if item.name in processor_config_files:
|
||||
continue
|
||||
os.unlink(item)
|
||||
|
||||
# Force the attr to recalculate
|
||||
@@ -204,22 +334,25 @@ class model(watch_base):
|
||||
return []
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {self.get('uuid')}")
|
||||
with open(fname, "r") as f:
|
||||
with open(fname, "r", encoding='utf-8') as f:
|
||||
for i in f.readlines():
|
||||
if ',' in i:
|
||||
k, v = i.strip().split(',', 2)
|
||||
|
||||
# The index history could contain a relative path, so we need to make the fullpath
|
||||
# so that python can read it
|
||||
if not '/' in v and not '\'' in v:
|
||||
# Cross-platform: check for any path separator (works on Windows and Unix)
|
||||
if os.sep not in v and '/' not in v and '\\' not in v:
|
||||
# Relative filename only, no path separators
|
||||
v = os.path.join(self.watch_data_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
snapshot_fname = v.split('/')[-1]
|
||||
# Cross-platform: use os.path.basename instead of split('/')
|
||||
snapshot_fname = os.path.basename(v)
|
||||
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
@@ -237,7 +370,7 @@ class model(watch_base):
|
||||
|
||||
@property
|
||||
def has_history(self):
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
return os.path.isfile(fname)
|
||||
|
||||
@property
|
||||
@@ -295,65 +428,121 @@ class model(watch_base):
|
||||
# When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
|
||||
return sorted_keys[-1]
|
||||
|
||||
def get_history_snapshot(self, timestamp):
|
||||
def get_history_snapshot(self, timestamp=None, filepath=None):
|
||||
"""
|
||||
Accepts either timestamp or filepath
|
||||
:param timestamp:
|
||||
:param filepath:
|
||||
:return:
|
||||
"""
|
||||
import brotli
|
||||
filepath = self.history[timestamp]
|
||||
|
||||
# See if a brotli versions exists and switch to that
|
||||
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||
filepath = f"{filepath}.br"
|
||||
if not filepath:
|
||||
filepath = self.history[timestamp]
|
||||
|
||||
# OR in the backup case that the .br does not exist, but the plain one does
|
||||
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||
if os.path.isfile(filepath.replace('.br', '')):
|
||||
filepath = filepath.replace('.br', '')
|
||||
# Check if binary file (image, PDF, etc.)
|
||||
# Binary files are NEVER saved with .br compression, only text files are
|
||||
binary_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.pdf', '.bin', '.jfif')
|
||||
is_binary = any(filepath.endswith(ext) for ext in binary_extensions)
|
||||
|
||||
# Only look for .br versions for text files
|
||||
if not is_binary:
|
||||
# See if a brotli version exists and switch to that (text files only)
|
||||
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||
filepath = f"{filepath}.br"
|
||||
|
||||
# OR in the backup case that the .br does not exist, but the plain one does
|
||||
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||
if os.path.isfile(filepath.replace('.br', '')):
|
||||
filepath = filepath.replace('.br', '')
|
||||
|
||||
# Handle .br compressed text files
|
||||
if filepath.endswith('.br'):
|
||||
# Brotli doesnt have a fileheader to detect it, so we rely on filename
|
||||
# https://www.rfc-editor.org/rfc/rfc7932
|
||||
# Note: .br should ONLY exist for text files, never binary
|
||||
with open(filepath, 'rb') as f:
|
||||
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||
return brotli.decompress(f.read()).decode('utf-8')
|
||||
|
||||
# Binary file - return raw bytes
|
||||
if is_binary:
|
||||
with open(filepath, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
# Text file - decode to string
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
def _write_atomic(self, dest, data):
|
||||
"""Write data atomically to dest using a temp file"""
|
||||
if not os.path.exists(dest):
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.replace(tmp_path, dest)
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||
import brotli
|
||||
import tempfile
|
||||
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
|
||||
def save_history_blob(self, contents, timestamp, snapshot_id):
|
||||
|
||||
logger.trace(f"{self.get('uuid')} - Updating {self.history_index_filename} with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
# Decide on snapshot filename and destination path
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
# Binary data - detect file type and save without compression
|
||||
if isinstance(contents, bytes):
|
||||
try:
|
||||
import puremagic
|
||||
detections = puremagic.magic_string(contents[:2048])
|
||||
ext = detections[0].extension if detections else 'bin'
|
||||
# Strip leading dot if present (puremagic returns extensions like '.jfif')
|
||||
ext = ext.lstrip('.')
|
||||
if detections:
|
||||
logger.trace(f"Detected file type: {detections[0].mime_type} -> extension: {ext}")
|
||||
except Exception as e:
|
||||
logger.warning(f"puremagic detection failed: {e}, using 'bin' extension")
|
||||
ext = 'bin'
|
||||
|
||||
snapshot_fname = f"{snapshot_id}.{ext}"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents)
|
||||
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
|
||||
|
||||
# Text data - use brotli compression if enabled and above threshold
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
encoded_data = contents.encode('utf-8')
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
# Compressed text
|
||||
import brotli
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
# Write snapshot file atomically if it doesn't exist
|
||||
if not os.path.exists(dest):
|
||||
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(encoded_data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.rename(tmp_path, dest)
|
||||
if not os.path.exists(dest):
|
||||
try:
|
||||
actual_dest = _brotli_subprocess_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
|
||||
if actual_dest != dest:
|
||||
snapshot_fname = os.path.basename(actual_dest)
|
||||
except Exception as e:
|
||||
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
|
||||
# Fallback to uncompressed
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
else:
|
||||
# Plain text
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
|
||||
# Append to history.txt atomically
|
||||
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
index_line = f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
# Lets try force flush here since it's usually a very small file
|
||||
# If this still fails in the future then try reading all to memory first, re-writing etc
|
||||
with open(index_fname, 'a', encoding='utf-8') as f:
|
||||
f.write(index_line)
|
||||
f.flush()
|
||||
@@ -401,7 +590,7 @@ class model(watch_base):
|
||||
# Compare each lines (set) against each history text file (set) looking for something new..
|
||||
existing_history = set({})
|
||||
for k, v in self.history.items():
|
||||
content = self.get_history_snapshot(k)
|
||||
content = self.get_history_snapshot(filepath=v)
|
||||
|
||||
if ignore_whitespace:
|
||||
alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
|
||||
@@ -605,7 +794,7 @@ class model(watch_base):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r') as f:
|
||||
with open(fname, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
return False
|
||||
|
||||
@@ -658,7 +847,7 @@ class model(watch_base):
|
||||
for k, fname in self.history.items():
|
||||
if os.path.isfile(fname):
|
||||
if True:
|
||||
contents = self.get_history_snapshot(k)
|
||||
contents = self.get_history_snapshot(timestamp=k)
|
||||
res = re.findall(regex, contents, re.MULTILINE)
|
||||
if res:
|
||||
if not csv_writer:
|
||||
@@ -751,7 +940,7 @@ class model(watch_base):
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
dates = list(self.history.keys())
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(dates[-1])
|
||||
return self.get_history_snapshot(timestamp=dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
@@ -761,25 +950,13 @@ class model(watch_base):
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||
_brotli_subprocess_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
import brotli
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
f.write(brotli.compress(contents))
|
||||
except Exception as e:
|
||||
logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
|
||||
logger.warning(e)
|
||||
f.write(contents)
|
||||
|
||||
_brotli_subprocess_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
def get_fetched_html(self, timestamp):
|
||||
@@ -837,6 +1014,7 @@ class model(watch_base):
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
if last_error:
|
||||
last_error = safe_jinja.render_fully_escaped(last_error)
|
||||
if '403' in last_error:
|
||||
if has_proxies:
|
||||
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a> '")))
|
||||
@@ -846,7 +1024,9 @@ class model(watch_base):
|
||||
output.append(str(Markup(last_error)))
|
||||
|
||||
if self.get('last_notification_error'):
|
||||
output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
|
||||
txt = safe_jinja.render_fully_escaped(self.get('last_notification_error'))
|
||||
result = f'<div class="notification-error"><a href="{url_for("settings.notification_logs")}">{txt}</a></div>'
|
||||
output.append(result)
|
||||
|
||||
else:
|
||||
# Lo_Fi version - no app context, cant rely on Jinja2 Markup
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
default_notification_format_for_watch = 'System default'
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
class watch_base(dict):
|
||||
@@ -44,7 +44,7 @@ class watch_base(dict):
|
||||
'method': 'GET',
|
||||
'notification_alert_count': 0,
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
'notification_muted': False,
|
||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||
'notification_title': None,
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
from changedetectionio.model import default_notification_format_for_watch
|
||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
|
||||
default_notification_format = 'HTML Color'
|
||||
default_notification_format = 'htmlcolor'
|
||||
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
||||
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
||||
|
||||
# The values (markdown etc) are from apprise NotifyFormat,
|
||||
# But to avoid importing the whole heavy module just use the same strings here.
|
||||
valid_notification_formats = {
|
||||
'Plain Text': 'text',
|
||||
'HTML': 'html',
|
||||
'HTML Color': 'htmlcolor',
|
||||
'Markdown to HTML': 'markdown',
|
||||
'text': 'Plain Text',
|
||||
'html': 'HTML',
|
||||
'htmlcolor': 'HTML Color',
|
||||
'markdown': 'Markdown to HTML',
|
||||
# Used only for editing a watch (not for global)
|
||||
default_notification_format_for_watch: default_notification_format_for_watch
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,61 @@
|
||||
"""
|
||||
Custom Apprise HTTP Handlers with format= Parameter Support
|
||||
|
||||
IMPORTANT: This module works around a limitation in Apprise's @notify decorator.
|
||||
|
||||
THE PROBLEM:
|
||||
-------------
|
||||
When using Apprise's @notify decorator to create custom notification handlers, the
|
||||
decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse
|
||||
URLs. This simple parsing mode does NOT extract the format= query parameter from the URL
|
||||
and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format.
|
||||
|
||||
As a result:
|
||||
1. URL: post://example.com/webhook?format=html
|
||||
2. Apprise parses this and sees format=html in qsd (query string dictionary)
|
||||
3. But it does NOT extract it and pass it to NotifyBase.__init__
|
||||
4. NotifyBase defaults to notify_format=TEXT
|
||||
5. When you call apobj.notify(body="<html>...", body_format="html"):
|
||||
- Apprise sees: input format = html, output format (notify_format) = text
|
||||
- Apprise calls convert_between("html", "text", body)
|
||||
- This strips all HTML tags, leaving only plain text
|
||||
6. Your custom handler receives stripped plain text instead of HTML
|
||||
|
||||
THE SOLUTION:
|
||||
-------------
|
||||
Instead of using the @notify decorator directly, we:
|
||||
1. Manually register custom plugins using plugins.N_MGR.add()
|
||||
2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin
|
||||
3. Override __init__ to extract format= from qsd and set it as kwargs['format']
|
||||
4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format']
|
||||
5. Set up _default_args like CustomNotifyPlugin does for compatibility
|
||||
|
||||
This ensures that when format=html is in the URL:
|
||||
- notify_format is set to HTML
|
||||
- Apprise sees: input format = html, output format = html
|
||||
- No conversion happens (convert_between returns content unchanged)
|
||||
- Your custom handler receives the original HTML intact
|
||||
|
||||
TESTING:
|
||||
--------
|
||||
To verify this works:
|
||||
>>> apobj = apprise.Apprise()
|
||||
>>> apobj.add('post://localhost:5005/test?format=html')
|
||||
>>> for server in apobj:
|
||||
... print(server.notify_format) # Should print: html (not text)
|
||||
>>> apobj.notify(body='<span>Test</span>', body_format='html')
|
||||
# Your handler should receive '<span>Test</span>' not 'Test'
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import unquote_plus
|
||||
|
||||
import requests
|
||||
from apprise.decorators import notify
|
||||
from apprise.utils.parse import parse_url as apprise_parse_url
|
||||
from apprise import plugins
|
||||
from apprise.decorators.base import CustomNotifyPlugin
|
||||
from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly
|
||||
from apprise.utils.logic import dict_full_update
|
||||
from loguru import logger
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
@@ -12,13 +63,66 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
|
||||
|
||||
|
||||
def notify_supported_methods(func):
|
||||
"""Register custom HTTP method handlers that properly support format= parameter."""
|
||||
for method in SUPPORTED_HTTP_METHODS:
|
||||
func = notify(on=method)(func)
|
||||
# Add support for https, for each supported http method
|
||||
func = notify(on=f"{method}s")(func)
|
||||
_register_http_handler(method, func)
|
||||
_register_http_handler(f"{method}s", func)
|
||||
return func
|
||||
|
||||
|
||||
def _register_http_handler(schema, send_func):
|
||||
"""Register a custom HTTP handler that extracts format= from URL query parameters."""
|
||||
|
||||
# Parse base URL
|
||||
base_url = f"{schema}://"
|
||||
base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True)
|
||||
|
||||
class CustomHTTPHandler(CustomNotifyPlugin):
|
||||
secure_protocol = schema
|
||||
service_name = f"Custom HTTP - {schema.upper()}"
|
||||
_base_args = base_args
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# Extract format from qsd and set it as a top-level kwarg
|
||||
# This allows NotifyBase.__init__ to properly set notify_format
|
||||
if 'qsd' in kwargs and 'format' in kwargs['qsd']:
|
||||
kwargs['format'] = kwargs['qsd']['format']
|
||||
|
||||
# Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__)
|
||||
super(CustomNotifyPlugin, self).__init__(**kwargs)
|
||||
|
||||
# Set up _default_args like CustomNotifyPlugin does
|
||||
self._default_args = {}
|
||||
kwargs.pop("secure", None)
|
||||
dict_full_update(self._default_args, self._base_args)
|
||||
dict_full_update(self._default_args, kwargs)
|
||||
self._default_args["url"] = url_assembly(**self._default_args)
|
||||
|
||||
__send = staticmethod(send_func)
|
||||
|
||||
def send(self, body, title="", notify_type="info", *args, **kwargs):
|
||||
"""Call the custom send function."""
|
||||
try:
|
||||
result = self.__send(
|
||||
body, title, notify_type,
|
||||
*args,
|
||||
meta=self._default_args,
|
||||
**kwargs
|
||||
)
|
||||
return True if result is None else bool(result)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Exception in custom HTTP handler: {e}")
|
||||
return False
|
||||
|
||||
# Register the plugin
|
||||
plugins.N_MGR.add(
|
||||
plugin=CustomHTTPHandler,
|
||||
schemas=schema,
|
||||
send_func=send_func,
|
||||
url=base_url,
|
||||
)
|
||||
|
||||
|
||||
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
|
||||
user: str | None = parsed_url.get("user")
|
||||
password: str | None = parsed_url.get("password")
|
||||
@@ -74,6 +178,8 @@ def apprise_http_custom_handler(
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> bool:
|
||||
|
||||
|
||||
url: str = meta.get("url")
|
||||
schema: str = meta.get("schema")
|
||||
method: str = re.sub(r"s$", "", schema).upper()
|
||||
@@ -89,25 +195,16 @@ def apprise_http_custom_handler(
|
||||
|
||||
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
|
||||
|
||||
try:
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
auth=auth,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=body.encode("utf-8") if isinstance(body, str) else body,
|
||||
)
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
auth=auth,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=body.encode("utf-8") if isinstance(body, str) else body,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
response.raise_for_status()
|
||||
|
||||
logger.info(f"Successfully sent custom notification to {url}")
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Remote host error while sending custom notification to {url}: {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
|
||||
return False
|
||||
logger.info(f"Successfully sent custom notification to {url}")
|
||||
return True
|
||||
|
||||
42
changedetectionio/notification/email_helpers.py
Normal file
42
changedetectionio/notification/email_helpers.py
Normal file
@@ -0,0 +1,42 @@
|
||||
def as_monospaced_html_email(content: str, title: str) -> str:
|
||||
"""
|
||||
Wraps `content` in a minimal, email-safe HTML template
|
||||
that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc.
|
||||
|
||||
Args:
|
||||
content: The body text (plain text or HTML-like).
|
||||
title: The title plaintext
|
||||
Returns:
|
||||
A complete HTML document string suitable for sending as an email body.
|
||||
"""
|
||||
|
||||
# All line feed types should be removed and then this function should only be fed <br>'s
|
||||
# Then it works with our <pre> styling without double linefeeds
|
||||
content = content.translate(str.maketrans('', '', '\r\n'))
|
||||
|
||||
if title:
|
||||
import html
|
||||
title = html.escape(title)
|
||||
else:
|
||||
title = ''
|
||||
# 2. Full email-safe HTML
|
||||
html_email = f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="x-apple-disable-message-reformatting">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<!--[if mso]>
|
||||
<style>
|
||||
body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }}
|
||||
</style>
|
||||
<![endif]-->
|
||||
<title>{title}</title>
|
||||
</head>
|
||||
<body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;">
|
||||
<pre role="article" aria-roledescription="email" lang="en"
|
||||
style="font-family: monospace, 'Courier New', Courier; font-size: 0.9rem;
|
||||
white-space: pre-wrap; word-break: break-word;">{content}</pre>
|
||||
</body>
|
||||
</html>"""
|
||||
return html_email
|
||||
@@ -1,16 +1,20 @@
|
||||
|
||||
import time
|
||||
import re
|
||||
import apprise
|
||||
from apprise import NotifyFormat
|
||||
from loguru import logger
|
||||
from urllib.parse import urlparse
|
||||
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
|
||||
from .apprise_plugin.custom_handlers import SUPPORTED_HTTP_METHODS
|
||||
from .email_helpers import as_monospaced_html_email
|
||||
from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \
|
||||
ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \
|
||||
CHANGED_PLACEMARKER_CLOSED
|
||||
from ..notification_service import NotificationContextData
|
||||
CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE
|
||||
import re
|
||||
|
||||
from ..notification_service import NotificationContextData, add_rendered_diff_to_notification_vars
|
||||
|
||||
newline_re = re.compile(r'\r\n|\r|\n')
|
||||
|
||||
def markup_text_links_to_html(body):
|
||||
"""
|
||||
@@ -61,13 +65,13 @@ def notification_format_align_with_apprise(n_format : str):
|
||||
:return:
|
||||
"""
|
||||
|
||||
if n_format.lower().startswith('html'):
|
||||
if n_format.startswith('html'):
|
||||
# Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here
|
||||
n_format = NotifyFormat.HTML.value
|
||||
elif n_format.lower().startswith('markdown'):
|
||||
elif n_format.startswith('markdown'):
|
||||
# probably the same but just to be safe
|
||||
n_format = NotifyFormat.MARKDOWN.value
|
||||
elif n_format.lower().startswith('text'):
|
||||
elif n_format.startswith('text'):
|
||||
# probably the same but just to be safe
|
||||
n_format = NotifyFormat.TEXT.value
|
||||
else:
|
||||
@@ -76,8 +80,133 @@ def notification_format_align_with_apprise(n_format : str):
|
||||
return n_format
|
||||
|
||||
|
||||
def apply_html_color_to_body(n_body: str):
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
|
||||
return n_body
|
||||
|
||||
def apply_discord_markdown_to_body(n_body):
|
||||
"""
|
||||
Discord does not support <del> but it supports non-standard ~~strikethrough~~
|
||||
:param n_body:
|
||||
:return:
|
||||
"""
|
||||
import re
|
||||
# Define the mapping between your placeholders and markdown markers
|
||||
replacements = [
|
||||
(REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'),
|
||||
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
|
||||
(CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'),
|
||||
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
|
||||
]
|
||||
# So that the markdown gets added without any whitespace following it which would break it
|
||||
for open_tag, open_md, close_tag, close_md in replacements:
|
||||
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
|
||||
pattern = re.compile(
|
||||
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
|
||||
flags=re.DOTALL
|
||||
)
|
||||
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
|
||||
return n_body
|
||||
|
||||
def apply_standard_markdown_to_body(n_body):
|
||||
"""
|
||||
Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough.
|
||||
:param n_body:
|
||||
:return:
|
||||
"""
|
||||
import re
|
||||
# Define the mapping between your placeholders and markdown markers
|
||||
replacements = [
|
||||
(REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'),
|
||||
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
|
||||
(CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'),
|
||||
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
|
||||
]
|
||||
|
||||
# So that the markdown gets added without any whitespace following it which would break it
|
||||
for open_tag, open_md, close_tag, close_md in replacements:
|
||||
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
|
||||
pattern = re.compile(
|
||||
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
|
||||
flags=re.DOTALL
|
||||
)
|
||||
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
|
||||
return n_body
|
||||
|
||||
|
||||
def replace_placemarkers_in_text(text, url, requested_output_format):
|
||||
"""
|
||||
Replace diff placemarkers in text based on the URL service type and requested output format.
|
||||
Used for both notification title and body to ensure consistent placeholder replacement.
|
||||
|
||||
:param text: The text to process
|
||||
:param url: The notification URL (to detect service type)
|
||||
:param requested_output_format: The output format (html, htmlcolor, markdown, text, etc.)
|
||||
:return: Processed text with placemarkers replaced
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
if url.startswith('tgram://'):
|
||||
# Telegram only supports a limited subset of HTML
|
||||
# Use strikethrough for removed content, bold for added content
|
||||
text = text.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
|
||||
text = text.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
|
||||
text = text.replace(ADDED_PLACEMARKER_OPEN, '<b>')
|
||||
text = text.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
text = text.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
|
||||
text = text.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
|
||||
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
|
||||
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
|
||||
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
|
||||
or url.startswith('https://discord.com/api')) and requested_output_format == 'html':
|
||||
# Discord doesn't support HTML, use Discord markdown
|
||||
text = apply_discord_markdown_to_body(n_body=text)
|
||||
elif requested_output_format == 'htmlcolor':
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
|
||||
text = text.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
|
||||
text = text.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
|
||||
text = text.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
|
||||
text = text.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
text = text.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
|
||||
text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
|
||||
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
|
||||
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
|
||||
elif requested_output_format == 'markdown':
|
||||
# Markdown to HTML - Apprise will convert this to HTML
|
||||
text = apply_standard_markdown_to_body(n_body=text)
|
||||
else:
|
||||
# plaintext, html, and default - use simple text markers
|
||||
text = text.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
|
||||
text = text.replace(REMOVED_PLACEMARKER_CLOSED, '')
|
||||
text = text.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
|
||||
text = text.replace(ADDED_PLACEMARKER_CLOSED, '')
|
||||
text = text.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
|
||||
text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'')
|
||||
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
|
||||
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
|
||||
|
||||
return text
|
||||
|
||||
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
|
||||
logger.debug(f"Applying markup in '{requested_output_format}' mode")
|
||||
|
||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||
# 2000 bytes minus -
|
||||
@@ -87,6 +216,12 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
if not n_body or not n_body.strip():
|
||||
return url, n_body, n_title
|
||||
|
||||
# Normalize URL scheme to lowercase to prevent case-sensitivity issues
|
||||
# e.g., "Discord://webhook" -> "discord://webhook", "TGRAM://bot123" -> "tgram://bot123"
|
||||
scheme_separator_pos = url.find('://')
|
||||
if scheme_separator_pos > 0:
|
||||
url = url[:scheme_separator_pos].lower() + url[scheme_separator_pos:]
|
||||
|
||||
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
|
||||
parsed = urlparse(url)
|
||||
k = '?' if not parsed.query else '&'
|
||||
@@ -98,23 +233,22 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
and not url.startswith('put'):
|
||||
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
|
||||
|
||||
# Replace placemarkers in title first (this was the missing piece causing the bug)
|
||||
# Titles are ALWAYS plain text across all notification services (Discord embeds, Slack attachments,
|
||||
# email Subject headers, etc.), so we always use 'text' format for title placemarker replacement
|
||||
# Looking over apprise library it seems that all plugins only expect plain-text.
|
||||
n_title = replace_placemarkers_in_text(n_title, url, 'text')
|
||||
|
||||
if url.startswith('tgram://'):
|
||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||
# re https://github.com/dgtlmoon/changedetection.io/issues/555
|
||||
# @todo re-use an existing library we have already imported to strip all non-allowed tags
|
||||
n_body = n_body.replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = newline_re.sub('\n', n_body)
|
||||
|
||||
# Use strikethrough for removed content, bold for added content
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '<b>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
|
||||
# Replace placemarkers for body
|
||||
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
|
||||
|
||||
# real limit is 4096, but minus some for extra metadata
|
||||
payload_max_size = 3600
|
||||
@@ -128,6 +262,7 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
# Discord doesn't support HTML, replace <br> with newlines
|
||||
n_body = n_body.strip().replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = newline_re.sub('\n', n_body)
|
||||
|
||||
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
|
||||
# The plugin will use embeds (6000 char limit across all embeds) if placeholders are present,
|
||||
@@ -137,15 +272,7 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
if requested_output_format == 'html':
|
||||
# No diff placeholders, use Discord markdown for any other formatting
|
||||
# Use Discord markdown: strikethrough for removed, bold for added
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '~~')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '~~')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '**')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '**')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '~~')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '~~')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '**')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '**')
|
||||
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
|
||||
|
||||
# Apply 2000 char limit for plain content
|
||||
payload_max_size = 1700
|
||||
@@ -156,57 +283,29 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
|
||||
# Is not discord/tgram and they want htmlcolor
|
||||
elif requested_output_format == 'htmlcolor':
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}">')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}">')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}">')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}">')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace("\n", '<br>')
|
||||
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
|
||||
n_body = newline_re.sub('<br>\n', n_body)
|
||||
elif requested_output_format == 'html':
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace("\n", '<br>')
|
||||
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
|
||||
n_body = newline_re.sub('<br>\n', n_body)
|
||||
elif requested_output_format == 'markdown':
|
||||
# Markdown to HTML - Apprise will convert this to HTML
|
||||
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
|
||||
|
||||
else: #plaintext etc default
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
|
||||
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
|
||||
|
||||
return url, n_body, n_title
|
||||
|
||||
|
||||
def process_notification(n_object: NotificationContextData, datastore):
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
|
||||
from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats
|
||||
# be sure its registered
|
||||
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||
# Register custom Discord plugin
|
||||
from .apprise_plugin.discord import NotifyDiscordCustom
|
||||
|
||||
# Create list of custom handler protocols (both http and https versions)
|
||||
custom_handler_protocols = [f"{method}://" for method in SUPPORTED_HTTP_METHODS]
|
||||
custom_handler_protocols += [f"{method}s://" for method in SUPPORTED_HTTP_METHODS]
|
||||
|
||||
has_custom_handler = any(
|
||||
url.startswith(tuple(custom_handler_protocols))
|
||||
for url in n_object['notification_urls']
|
||||
)
|
||||
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
|
||||
@@ -217,27 +316,21 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
# Insert variables into the notification content
|
||||
notification_parameters = create_notification_parameters(n_object, datastore)
|
||||
|
||||
requested_output_format = valid_notification_formats.get(
|
||||
n_object.get('notification_format', default_notification_format),
|
||||
valid_notification_formats[default_notification_format],
|
||||
)
|
||||
requested_output_format = n_object.get('notification_format', default_notification_format)
|
||||
logger.debug(f"Requested notification output format: '{requested_output_format}'")
|
||||
|
||||
# If we arrived with 'System default' then look it up
|
||||
if requested_output_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch:
|
||||
if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
# Initially text or whatever
|
||||
requested_output_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]).lower()
|
||||
requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format)
|
||||
|
||||
requested_output_format_original = requested_output_format
|
||||
|
||||
# Now clean it up so it fits perfectly with apprise
|
||||
requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format)
|
||||
|
||||
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
|
||||
|
||||
# If we have custom handlers, use invalid format to prevent conversion
|
||||
# Otherwise use the proper format
|
||||
if has_custom_handler:
|
||||
input_format = 'raw-no-convert'
|
||||
|
||||
# https://github.com/caronc/apprise/wiki/Development_LogCapture
|
||||
# Anything higher than or equal to WARNING (which covers things like Connection errors)
|
||||
# raise it as an exception
|
||||
@@ -258,74 +351,108 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
if not n_object.get('notification_urls'):
|
||||
return None
|
||||
|
||||
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
|
||||
n_object.update(add_rendered_diff_to_notification_vars(
|
||||
notification_scan_text=n_object.get('notification_body', '')+n_object.get('notification_title', ''),
|
||||
current_snapshot=n_object.get('current_snapshot'),
|
||||
prev_snapshot=n_object.get('prev_snapshot'),
|
||||
# Should always be false for 'text' mode or its too hard to read
|
||||
# But otherwise, this could be some setting
|
||||
word_diff=False if requested_output_format_original == 'text' else True,
|
||||
)
|
||||
)
|
||||
|
||||
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
|
||||
for url in n_object['notification_urls']:
|
||||
parsed_url = urlparse(url)
|
||||
prefix_add_to_url = '?' if not parsed_url.query else '&'
|
||||
|
||||
# Get the notification body from datastore
|
||||
n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
|
||||
|
||||
if n_object.get('markup_text_to_html'):
|
||||
n_body = markup_text_links_to_html(body=n_body)
|
||||
|
||||
# This actually means we request "Markdown to HTML"
|
||||
if requested_output_format == NotifyFormat.MARKDOWN.value:
|
||||
output_format = NotifyFormat.HTML.value
|
||||
input_format = NotifyFormat.MARKDOWN.value
|
||||
if not 'format=' in url.lower():
|
||||
url = f"{url}{prefix_add_to_url}format={output_format}"
|
||||
|
||||
# Deviation from apprise.
|
||||
# No conversion, its like they want to send raw HTML but we add linebreaks
|
||||
elif requested_output_format == NotifyFormat.HTML.value:
|
||||
# same in and out means apprise wont try to convert
|
||||
input_format = output_format = NotifyFormat.HTML.value
|
||||
if not 'format=' in url.lower():
|
||||
url = f"{url}{prefix_add_to_url}format={output_format}"
|
||||
|
||||
else:
|
||||
# Nothing to be done, leave it as plaintext
|
||||
# `body_format` Tell apprise what format the INPUT is in
|
||||
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
|
||||
input_format = output_format = NotifyFormat.TEXT.value
|
||||
if not 'format=' in url.lower():
|
||||
url = f"{url}{prefix_add_to_url}format={output_format}"
|
||||
|
||||
if has_custom_handler:
|
||||
input_format='raw-no-convert'
|
||||
|
||||
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
|
||||
|
||||
if n_object.get('markup_text_links_to_html_links'):
|
||||
n_body = markup_text_links_to_html(body=n_body)
|
||||
|
||||
url = url.strip()
|
||||
if url.startswith('#'):
|
||||
logger.trace(f"Skipping commented out notification URL - {url}")
|
||||
if not url or url.startswith('#'):
|
||||
logger.debug(f"Skipping commented out or empty notification URL - '{url}'")
|
||||
continue
|
||||
|
||||
if not url:
|
||||
logger.warning(f"Process Notification: skipping empty notification URL.")
|
||||
continue
|
||||
|
||||
logger.info(f">> Process Notification: AppRise notifying {url}")
|
||||
logger.info(f">> Process Notification: AppRise start notifying '{url}'")
|
||||
url = jinja_render(template_str=url, **notification_parameters)
|
||||
|
||||
# If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped
|
||||
watch_mime_type = n_object.get('watch_mime_type')
|
||||
if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower():
|
||||
if 'html' in requested_output_format:
|
||||
from markupsafe import escape
|
||||
n_body = str(escape(n_body))
|
||||
|
||||
if 'html' in requested_output_format:
|
||||
# Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output
|
||||
# But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much.
|
||||
n_body = n_body.replace(' ', ' ')
|
||||
|
||||
(url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original)
|
||||
|
||||
apobj.add(url)
|
||||
apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS"
|
||||
|
||||
if not 'format=' in url:
|
||||
parsed_url = urlparse(url)
|
||||
prefix_add_to_url = '?' if not parsed_url.query else '&'
|
||||
|
||||
# THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC
|
||||
if 'html' in requested_output_format:
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
|
||||
apprise_input_format = NotifyFormat.HTML.value
|
||||
elif 'text' in requested_output_format:
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}"
|
||||
apprise_input_format = NotifyFormat.TEXT.value
|
||||
|
||||
elif requested_output_format == NotifyFormat.MARKDOWN.value:
|
||||
# Convert markdown to HTML ourselves since not all plugins do this
|
||||
from apprise.conversion import markdown_to_html
|
||||
# Make sure there are paragraph breaks around horizontal rules
|
||||
n_body = n_body.replace('---', '\n\n---\n\n')
|
||||
n_body = markdown_to_html(n_body)
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
|
||||
requested_output_format = NotifyFormat.HTML.value
|
||||
apprise_input_format = NotifyFormat.HTML.value # Changed from MARKDOWN to HTML
|
||||
|
||||
else:
|
||||
# ?format was IN the apprise URL, they are kind of on their own here, we will try our best
|
||||
if 'format=html' in url:
|
||||
n_body = newline_re.sub('<br>\r\n', n_body)
|
||||
# This will also prevent apprise from doing conversion
|
||||
apprise_input_format = NotifyFormat.HTML.value
|
||||
requested_output_format = NotifyFormat.HTML.value
|
||||
elif 'format=text' in url:
|
||||
apprise_input_format = NotifyFormat.TEXT.value
|
||||
requested_output_format = NotifyFormat.TEXT.value
|
||||
|
||||
#@todo on null:// (only if its a 1 url with null) probably doesnt need to actually .add/setup/etc
|
||||
sent_objs.append({'title': n_title,
|
||||
'body': n_body,
|
||||
'url': url})
|
||||
'url': url,
|
||||
# So that we can do a null:// call and get back exactly what would have been sent
|
||||
'original_context': n_object })
|
||||
|
||||
apobj.notify(
|
||||
title=n_title,
|
||||
body=n_body,
|
||||
# `body_format` Tell apprise what format the INPUT is in
|
||||
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
|
||||
body_format=input_format,
|
||||
# False is not an option for AppRise, must be type None
|
||||
attach=n_object.get('screenshot', None)
|
||||
)
|
||||
if not url.startswith('null://'):
|
||||
apobj.add(url)
|
||||
|
||||
# Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely.
|
||||
# It should always be similar to the 'history' part of the UI.
|
||||
if url.startswith('mail') and 'html' in requested_output_format:
|
||||
if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already
|
||||
n_body = as_monospaced_html_email(content=n_body, title=n_title)
|
||||
|
||||
if not url.startswith('null://'):
|
||||
apobj.notify(
|
||||
title=n_title,
|
||||
body=n_body,
|
||||
# `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise
|
||||
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
|
||||
body_format=apprise_input_format,
|
||||
# False is not an option for AppRise, must be type None
|
||||
attach=n_object.get('screenshot', None)
|
||||
)
|
||||
|
||||
# Returns empty string if nothing found, multi-line string otherwise
|
||||
log_value = logs.getvalue()
|
||||
@@ -344,6 +471,8 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
|
||||
ext_base_url = datastore.data['settings']['application'].get('active_base_url').strip('/')+'/'
|
||||
|
||||
watch = datastore.data['watching'].get(n_object['uuid'])
|
||||
if watch:
|
||||
watch_title = datastore.data['watching'][n_object['uuid']].label
|
||||
@@ -357,20 +486,29 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
|
||||
watch_title = 'Change Detection'
|
||||
watch_tag = ''
|
||||
|
||||
# Create URLs to customise the notification with
|
||||
# active_base_url - set in store.py data property
|
||||
base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||
|
||||
watch_url = n_object['watch_url']
|
||||
|
||||
diff_url = "{}/diff/{}".format(base_url, n_object['uuid'])
|
||||
preview_url = "{}/preview/{}".format(base_url, n_object['uuid'])
|
||||
# Build URLs manually instead of using url_for() to avoid requiring a request context
|
||||
# This allows notifications to be processed in background threads
|
||||
uuid = n_object['uuid']
|
||||
|
||||
if n_object.get('timestamp_from') and n_object.get('timestamp_to'):
|
||||
# Include a link to the diff page with specific versions
|
||||
diff_url = f"{ext_base_url}diff/{uuid}?from_version={n_object['timestamp_from']}&to_version={n_object['timestamp_to']}"
|
||||
else:
|
||||
diff_url = f"{ext_base_url}diff/{uuid}"
|
||||
|
||||
preview_url = f"{ext_base_url}preview/{uuid}"
|
||||
edit_url = f"{ext_base_url}edit/{uuid}"
|
||||
|
||||
# @todo test that preview_url is correct when running in not-null mode?
|
||||
# if not, first time app loads i think it can set a flask context
|
||||
n_object.update(
|
||||
{
|
||||
'base_url': base_url,
|
||||
'base_url': ext_base_url,
|
||||
'diff_url': diff_url,
|
||||
'preview_url': preview_url,
|
||||
'preview_url': preview_url, #@todo include 'version='
|
||||
'edit_url': edit_url, #@todo also pause, also mute link
|
||||
'watch_tag': watch_tag if watch_tag is not None else '',
|
||||
'watch_title': watch_title if watch_title is not None else '',
|
||||
'watch_url': watch_url,
|
||||
|
||||
@@ -5,33 +5,83 @@ Notification Service Module
|
||||
Extracted from update_worker.py to provide standalone notification functionality
|
||||
for both sync and async workers
|
||||
"""
|
||||
import datetime
|
||||
|
||||
import pytz
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from changedetectionio.notification import default_notification_format
|
||||
from changedetectionio.notification import default_notification_format, valid_notification_formats
|
||||
|
||||
|
||||
def _check_cascading_vars(datastore, var_name, watch):
|
||||
"""
|
||||
Check notification variables in cascading priority:
|
||||
Individual watch settings > Tag settings > Global settings
|
||||
"""
|
||||
from changedetectionio.notification import (
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
default_notification_body,
|
||||
default_notification_title
|
||||
)
|
||||
|
||||
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
|
||||
v = watch.get(var_name)
|
||||
if v and not watch.get('notification_muted'):
|
||||
if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
return datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
return v
|
||||
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
v = tag.get(var_name)
|
||||
if v and not tag.get('notification_muted'):
|
||||
return v
|
||||
|
||||
if datastore.data['settings']['application'].get(var_name):
|
||||
return datastore.data['settings']['application'].get(var_name)
|
||||
|
||||
# Otherwise could be defaults
|
||||
if var_name == 'notification_format':
|
||||
return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
if var_name == 'notification_body':
|
||||
return default_notification_body
|
||||
if var_name == 'notification_title':
|
||||
return default_notification_title
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
super().__init__({
|
||||
'base_url': None,
|
||||
'current_snapshot': None,
|
||||
'diff': None,
|
||||
'diff_clean': None,
|
||||
'diff_added': None,
|
||||
'diff_added_clean': None,
|
||||
'diff_full': None,
|
||||
'diff_full_clean': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_removed_clean': None,
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'timestamp_from': None,
|
||||
'timestamp_to': None,
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
'base_url': None,
|
||||
'diff_url': None,
|
||||
'preview_url': None,
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'markup_text_to_html': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
})
|
||||
|
||||
# Apply any initial data passed in
|
||||
@@ -43,15 +93,126 @@ class NotificationContextData(dict):
|
||||
if kwargs:
|
||||
self.update(kwargs)
|
||||
|
||||
n_format = self.get('notification_format')
|
||||
if n_format and not valid_notification_formats.get(n_format):
|
||||
raise ValueError(f'Invalid notification format: "{n_format}"')
|
||||
|
||||
def set_random_for_validation(self):
|
||||
import random, string
|
||||
"""Randomly fills all dict keys with random strings (for validation/testing)."""
|
||||
"""Randomly fills all dict keys with random strings (for validation/testing).
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid']:
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'):
|
||||
if not valid_notification_formats.get(value):
|
||||
raise ValueError(f'Invalid notification format: "{value}"')
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def timestamp_to_localtime(timestamp):
|
||||
# Format the date using locale-aware formatting with timezone
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
|
||||
# Get local timezone-aware datetime
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
local_dt = dt.astimezone(local_tz)
|
||||
|
||||
# Format date with timezone - using strftime for locale awareness
|
||||
try:
|
||||
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
|
||||
except:
|
||||
# Fallback if locale issues
|
||||
formatted_date = local_dt.isoformat()
|
||||
|
||||
return formatted_date
|
||||
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||
"""
|
||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||
|
||||
Scans the notification template for diff placeholder usage (diff, diff_added, diff_clean, etc.)
|
||||
and only renders those specific variants, avoiding expensive render_diff() calls for unused placeholders.
|
||||
Uses LRU caching to avoid duplicate renders when multiple placeholders share the same arguments.
|
||||
|
||||
Args:
|
||||
notification_scan_text: The notification template text to scan for placeholders
|
||||
prev_snapshot: Previous version of content for diff comparison
|
||||
current_snapshot: Current version of content for diff comparison
|
||||
word_diff: Whether to use word-level (True) or line-level (False) diffing
|
||||
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Define specifications for each diff variant
|
||||
diff_specs = {
|
||||
'diff': {'word_diff': word_diff},
|
||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||
'diff_added': {'word_diff': word_diff, 'include_removed': False},
|
||||
'diff_added_clean': {'word_diff': word_diff, 'include_removed': False, 'include_change_type_prefix': False},
|
||||
'diff_full': {'word_diff': word_diff, 'include_equal': True},
|
||||
'diff_full_clean': {'word_diff': word_diff, 'include_equal': True, 'include_change_type_prefix': False},
|
||||
'diff_patch': {'word_diff': word_diff, 'patch_format': True},
|
||||
'diff_removed': {'word_diff': word_diff, 'include_added': False},
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
||||
@lru_cache(maxsize=4)
|
||||
def cached_render(kwargs_tuple):
|
||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only check and render diff keys that exist in NotificationContextData
|
||||
for key in NotificationContextData().keys():
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
kwargs = diff_specs[key]
|
||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
|
||||
return ret
|
||||
|
||||
def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggered_text, timestamp_changed=None):
|
||||
|
||||
n_object = {
|
||||
'current_snapshot': current_snapshot,
|
||||
'prev_snapshot': prev_snapshot,
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
'watch_uuid': watch.get('uuid') if watch else None,
|
||||
'watch_mime_type': watch.get('content-type')
|
||||
}
|
||||
|
||||
# The \n's in the content from the above will get converted to <br> etc depending on the notification format
|
||||
|
||||
if watch:
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
|
||||
return n_object
|
||||
|
||||
class NotificationService:
|
||||
"""
|
||||
Standalone notification service that handles all notification functionality
|
||||
@@ -62,12 +223,11 @@ class NotificationService:
|
||||
self.datastore = datastore
|
||||
self.notification_q = notification_q
|
||||
|
||||
def queue_notification_for_watch(self, n_object: NotificationContextData, watch):
|
||||
def queue_notification_for_watch(self, n_object: NotificationContextData, watch, date_index_from=-2, date_index_to=-1):
|
||||
"""
|
||||
Queue a notification for a watch with full diff rendering and template variables
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
from changedetectionio.notification import default_notification_format_for_watch
|
||||
from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
@@ -75,8 +235,6 @@ class NotificationService:
|
||||
dates = []
|
||||
trigger_text = ''
|
||||
|
||||
now = time.time()
|
||||
|
||||
if watch:
|
||||
watch_history = watch.history
|
||||
dates = list(watch_history.keys())
|
||||
@@ -84,100 +242,43 @@ class NotificationService:
|
||||
|
||||
# Add text that was triggered
|
||||
if len(dates):
|
||||
snapshot_contents = watch.get_history_snapshot(dates[-1])
|
||||
snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1])
|
||||
else:
|
||||
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
|
||||
|
||||
# If we ended up here with "System default"
|
||||
if n_object.get('notification_format') == default_notification_format_for_watch:
|
||||
if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
# HTML needs linebreak, but MarkDown and Text can use a linefeed
|
||||
if n_object.get('notification_format') == 'HTML':
|
||||
line_feed_sep = "<br>"
|
||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
||||
elif n_object.get('notification_format') == 'HTML Color':
|
||||
line_feed_sep = "<br>"
|
||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
||||
else:
|
||||
line_feed_sep = "\n"
|
||||
|
||||
triggered_text = ''
|
||||
if len(trigger_text):
|
||||
from . import html_tools
|
||||
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
|
||||
if triggered_text:
|
||||
triggered_text = line_feed_sep.join(triggered_text)
|
||||
triggered_text = '\n'.join(triggered_text)
|
||||
|
||||
# Could be called as a 'test notification' with only 1 snapshot available
|
||||
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
|
||||
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
|
||||
|
||||
if len(dates) > 1:
|
||||
prev_snapshot = watch.get_history_snapshot(dates[-2])
|
||||
current_snapshot = watch.get_history_snapshot(dates[-1])
|
||||
prev_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_from])
|
||||
current_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_to])
|
||||
|
||||
n_object.update({
|
||||
'current_snapshot': snapshot_contents,
|
||||
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep),
|
||||
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep),
|
||||
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
|
||||
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
'watch_uuid': watch.get('uuid') if watch else None,
|
||||
})
|
||||
|
||||
if watch:
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot,
|
||||
prev_snapshot=prev_snapshot,
|
||||
watch=watch,
|
||||
triggered_text=triggered_text,
|
||||
timestamp_changed=dates[date_index_to]))
|
||||
|
||||
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
|
||||
logger.debug("Queued notification for sending")
|
||||
self.notification_q.put(n_object)
|
||||
|
||||
def _check_cascading_vars(self, var_name, watch):
|
||||
"""
|
||||
Check notification variables in cascading priority:
|
||||
Individual watch settings > Tag settings > Global settings
|
||||
"""
|
||||
from changedetectionio.notification import (
|
||||
default_notification_format_for_watch,
|
||||
default_notification_body,
|
||||
default_notification_title
|
||||
)
|
||||
|
||||
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
|
||||
v = watch.get(var_name)
|
||||
if v and not watch.get('notification_muted'):
|
||||
if var_name == 'notification_format' and v == default_notification_format_for_watch:
|
||||
return self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
return v
|
||||
|
||||
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
v = tag.get(var_name)
|
||||
if v and not tag.get('notification_muted'):
|
||||
return v
|
||||
|
||||
if self.datastore.data['settings']['application'].get(var_name):
|
||||
return self.datastore.data['settings']['application'].get(var_name)
|
||||
|
||||
# Otherwise could be defaults
|
||||
if var_name == 'notification_format':
|
||||
return default_notification_format_for_watch
|
||||
if var_name == 'notification_body':
|
||||
return default_notification_body
|
||||
if var_name == 'notification_title':
|
||||
return default_notification_title
|
||||
|
||||
return None
|
||||
if self.notification_q:
|
||||
logger.debug("Queued notification for sending")
|
||||
self.notification_q.put(n_object)
|
||||
else:
|
||||
logger.debug("Not queued, no queue defined. Just returning processed data")
|
||||
return n_object
|
||||
|
||||
def send_content_changed_notification(self, watch_uuid):
|
||||
"""
|
||||
@@ -200,10 +301,11 @@ class NotificationService:
|
||||
# Should be a better parent getter in the model object
|
||||
|
||||
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
|
||||
n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch)
|
||||
n_object['notification_title'] = self._check_cascading_vars('notification_title', watch)
|
||||
n_object['notification_body'] = self._check_cascading_vars('notification_body', watch)
|
||||
n_object['notification_format'] = self._check_cascading_vars('notification_format', watch)
|
||||
# this change probably not needed?
|
||||
n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch)
|
||||
n_object['notification_title'] = _check_cascading_vars(self.datastore,'notification_title', watch)
|
||||
n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch)
|
||||
n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch)
|
||||
|
||||
# (Individual watch) Only prepare to notify if the rules above matched
|
||||
queued = False
|
||||
@@ -226,9 +328,8 @@ class NotificationService:
|
||||
if not watch:
|
||||
return
|
||||
|
||||
n_format = self.datastore.data['settings']['application'].get('notification_format', default_notification_format)
|
||||
filter_list = ", ".join(watch['include_filters'])
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_to_html' is not needed
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
|
||||
body = f"""Hello,
|
||||
|
||||
Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts.
|
||||
@@ -243,9 +344,9 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||
'notification_body': body,
|
||||
'notification_format': n_format,
|
||||
'markup_text_to_html': n_format.lower().startswith('html')
|
||||
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
if len(watch['notification_urls']):
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
@@ -273,9 +374,9 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
if not watch:
|
||||
return
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
||||
n_format = self.datastore.data['settings']['application'].get('notification_format', default_notification_format).lower()
|
||||
|
||||
step = step_n + 1
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_to_html' is not needed
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
|
||||
|
||||
# {{{{ }}}} because this will be Jinja2 {{ }} tokens
|
||||
body = f"""Hello,
|
||||
@@ -292,9 +393,9 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
|
||||
'notification_body': body,
|
||||
'notification_format': n_format,
|
||||
'markup_text_to_html': n_format.lower().startswith('html')
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
if len(watch['notification_urls']):
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
|
||||
@@ -2,6 +2,7 @@ import pluggy
|
||||
import os
|
||||
import importlib
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
# Global plugin namespace for changedetection.io
|
||||
PLUGIN_NAMESPACE = "changedetectionio"
|
||||
@@ -16,15 +17,94 @@ class ChangeDetectionSpec:
|
||||
@hookspec
|
||||
def ui_edit_stats_extras(watch):
|
||||
"""Return HTML content to add to the stats tab in the edit view.
|
||||
|
||||
|
||||
Args:
|
||||
watch: The watch object being edited
|
||||
|
||||
|
||||
Returns:
|
||||
str: HTML content to be inserted in the stats tab
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_content_fetcher(self):
|
||||
"""Return a tuple of (fetcher_name, fetcher_class) for content fetcher plugins.
|
||||
|
||||
The fetcher_name should start with 'html_' and the fetcher_class
|
||||
should inherit from changedetectionio.content_fetchers.base.Fetcher
|
||||
|
||||
Returns:
|
||||
tuple: (str: fetcher_name, class: fetcher_class)
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def fetcher_status_icon(fetcher_name):
|
||||
"""Return status icon HTML attributes for a content fetcher.
|
||||
|
||||
Args:
|
||||
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing <img> tags or other status icon elements
|
||||
Empty string if no custom status icon is needed
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def plugin_static_path(self):
|
||||
"""Return the path to the plugin's static files directory.
|
||||
|
||||
Returns:
|
||||
str: Absolute path to the plugin's static directory, or None if no static files
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url):
|
||||
"""Provide custom implementation of get_itemprop_availability for a specific fetcher.
|
||||
|
||||
This hook allows plugins to provide their own product availability detection
|
||||
when their fetcher is being used. This is called as a fallback when the built-in
|
||||
method doesn't find good data.
|
||||
|
||||
Args:
|
||||
content: The HTML/text content to parse
|
||||
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
|
||||
fetcher_instance: The fetcher instance that generated the content
|
||||
url: The URL being watched/checked
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with availability data:
|
||||
{
|
||||
'price': float or None,
|
||||
'availability': str or None, # e.g., 'in stock', 'out of stock'
|
||||
'currency': str or None, # e.g., 'USD', 'EUR'
|
||||
}
|
||||
Or None if this plugin doesn't handle this fetcher or couldn't extract data
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def plugin_settings_tab(self):
|
||||
"""Return settings tab information for this plugin.
|
||||
|
||||
This hook allows plugins to add their own settings tab to the settings page.
|
||||
Settings will be saved to a separate JSON file in the datastore directory.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with settings tab information:
|
||||
{
|
||||
'plugin_id': str, # Unique identifier (e.g., 'zyte_fetcher')
|
||||
'tab_label': str, # Display name for tab (e.g., 'Zyte Fetcher')
|
||||
'form_class': Form, # WTForms Form class for the settings
|
||||
'template_path': str, # Optional: path to Jinja2 template (relative to plugin)
|
||||
# If not provided, a default form renderer will be used
|
||||
}
|
||||
Or None if this plugin doesn't provide settings
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -65,18 +145,334 @@ load_plugins_from_directories()
|
||||
# Discover installed plugins from external packages (if any)
|
||||
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
||||
|
||||
# Function to inject datastore into plugins that need it
|
||||
def inject_datastore_into_plugins(datastore):
|
||||
"""Inject the global datastore into plugins that need access to settings.
|
||||
|
||||
This should be called after plugins are loaded and datastore is initialized.
|
||||
|
||||
Args:
|
||||
datastore: The global ChangeDetectionStore instance
|
||||
"""
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
# Check if plugin has datastore attribute and it's not set
|
||||
if hasattr(plugin_obj, 'datastore'):
|
||||
if plugin_obj.datastore is None:
|
||||
plugin_obj.datastore = datastore
|
||||
logger.debug(f"Injected datastore into plugin: {plugin_name}")
|
||||
|
||||
# Function to register built-in fetchers - called later from content_fetchers/__init__.py
|
||||
def register_builtin_fetchers():
|
||||
"""Register built-in content fetchers as internal plugins
|
||||
|
||||
This is called from content_fetchers/__init__.py after all fetchers are imported
|
||||
to avoid circular import issues.
|
||||
"""
|
||||
from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium
|
||||
|
||||
# Register each built-in fetcher plugin
|
||||
if hasattr(requests, 'requests_plugin'):
|
||||
plugin_manager.register(requests.requests_plugin, 'builtin_requests')
|
||||
|
||||
if hasattr(playwright, 'playwright_plugin'):
|
||||
plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')
|
||||
|
||||
if hasattr(puppeteer, 'puppeteer_plugin'):
|
||||
plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
|
||||
|
||||
if hasattr(webdriver_selenium, 'webdriver_selenium_plugin'):
|
||||
plugin_manager.register(webdriver_selenium.webdriver_selenium_plugin, 'builtin_webdriver_selenium')
|
||||
|
||||
# Helper function to collect UI stats extras from all plugins
|
||||
def collect_ui_edit_stats_extras(watch):
|
||||
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
||||
extras_content = []
|
||||
|
||||
|
||||
# Get all plugins that implement the ui_edit_stats_extras hook
|
||||
results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
|
||||
|
||||
|
||||
# If we have results, add them to our content
|
||||
if results:
|
||||
for result in results:
|
||||
if result: # Skip empty results
|
||||
extras_content.append(result)
|
||||
|
||||
return "\n".join(extras_content) if extras_content else ""
|
||||
|
||||
return "\n".join(extras_content) if extras_content else ""
|
||||
|
||||
def collect_fetcher_status_icons(fetcher_name):
|
||||
"""Collect status icon data from all plugins
|
||||
|
||||
Args:
|
||||
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
dict or None: Icon data dictionary from first matching plugin, or None
|
||||
"""
|
||||
# Get status icon data from plugins
|
||||
results = plugin_manager.hook.fetcher_status_icon(fetcher_name=fetcher_name)
|
||||
|
||||
# Return first non-None result
|
||||
if results:
|
||||
for result in results:
|
||||
if result and isinstance(result, dict):
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url):
|
||||
"""Get itemprop availability data from plugins as a fallback.
|
||||
|
||||
This is called when the built-in get_itemprop_availability doesn't find good data.
|
||||
|
||||
Args:
|
||||
content: The HTML/text content to parse
|
||||
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
|
||||
fetcher_instance: The fetcher instance that generated the content
|
||||
url: The URL being watched (watch.link - includes Jinja2 evaluation)
|
||||
|
||||
Returns:
|
||||
dict or None: Availability data dictionary from first matching plugin, or None
|
||||
"""
|
||||
# Get availability data from plugins
|
||||
results = plugin_manager.hook.get_itemprop_availability_override(
|
||||
content=content,
|
||||
fetcher_name=fetcher_name,
|
||||
fetcher_instance=fetcher_instance,
|
||||
url=url
|
||||
)
|
||||
|
||||
# Return first non-None result with actual data
|
||||
if results:
|
||||
for result in results:
|
||||
if result and isinstance(result, dict):
|
||||
# Check if the result has any meaningful data
|
||||
if result.get('price') is not None or result.get('availability'):
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_active_plugins():
|
||||
"""Get a list of active plugins with their descriptions.
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries with plugin information:
|
||||
[
|
||||
{'name': 'plugin_name', 'description': 'Plugin description'},
|
||||
...
|
||||
]
|
||||
"""
|
||||
active_plugins = []
|
||||
|
||||
# Get all registered plugins
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
# Skip built-in plugins (they start with 'builtin_')
|
||||
if plugin_name.startswith('builtin_'):
|
||||
continue
|
||||
|
||||
# Get plugin description if available
|
||||
description = None
|
||||
if hasattr(plugin_obj, '__doc__') and plugin_obj.__doc__:
|
||||
description = plugin_obj.__doc__.strip().split('\n')[0] # First line only
|
||||
elif hasattr(plugin_obj, 'description'):
|
||||
description = plugin_obj.description
|
||||
|
||||
# Try to get a friendly name from the plugin
|
||||
friendly_name = plugin_name
|
||||
if hasattr(plugin_obj, 'name'):
|
||||
friendly_name = plugin_obj.name
|
||||
|
||||
active_plugins.append({
|
||||
'name': friendly_name,
|
||||
'description': description or 'No description available'
|
||||
})
|
||||
|
||||
return active_plugins
|
||||
|
||||
|
||||
def get_fetcher_capabilities(watch, datastore):
|
||||
"""Get capability flags for a watch's fetcher.
|
||||
|
||||
Args:
|
||||
watch: The watch object/dict
|
||||
datastore: The datastore to resolve 'system' fetcher
|
||||
|
||||
Returns:
|
||||
dict: Dictionary with capability flags:
|
||||
{
|
||||
'supports_browser_steps': bool,
|
||||
'supports_screenshots': bool,
|
||||
'supports_xpath_element_data': bool
|
||||
}
|
||||
"""
|
||||
# Get the fetcher name from watch
|
||||
fetcher_name = watch.get('fetch_backend', 'system')
|
||||
|
||||
# Resolve 'system' to actual fetcher
|
||||
if fetcher_name == 'system':
|
||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
|
||||
# Get the fetcher class
|
||||
from changedetectionio import content_fetchers
|
||||
|
||||
# Try to get from built-in fetchers first
|
||||
if hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
# Try to get from plugin-provided fetchers
|
||||
# Query all plugins for registered fetchers
|
||||
plugin_fetchers = plugin_manager.hook.register_content_fetcher()
|
||||
for fetcher_registration in plugin_fetchers:
|
||||
if fetcher_registration:
|
||||
name, fetcher_class = fetcher_registration
|
||||
if name == fetcher_name:
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
# Default: no capabilities
|
||||
return {
|
||||
'supports_browser_steps': False,
|
||||
'supports_screenshots': False,
|
||||
'supports_xpath_element_data': False
|
||||
}
|
||||
|
||||
|
||||
def get_plugin_settings_tabs():
|
||||
"""Get all plugin settings tabs.
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries with plugin settings tab information:
|
||||
[
|
||||
{
|
||||
'plugin_id': str,
|
||||
'tab_label': str,
|
||||
'form_class': Form,
|
||||
'description': str
|
||||
},
|
||||
...
|
||||
]
|
||||
"""
|
||||
tabs = []
|
||||
results = plugin_manager.hook.plugin_settings_tab()
|
||||
|
||||
for result in results:
|
||||
if result and isinstance(result, dict):
|
||||
# Validate required fields
|
||||
if 'plugin_id' in result and 'tab_label' in result and 'form_class' in result:
|
||||
tabs.append(result)
|
||||
else:
|
||||
logger.warning(f"Invalid plugin settings tab spec: {result}")
|
||||
|
||||
return tabs
|
||||
|
||||
|
||||
def load_plugin_settings(datastore_path, plugin_id):
|
||||
"""Load settings for a specific plugin from JSON file.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher')
|
||||
|
||||
Returns:
|
||||
dict: Plugin settings, or empty dict if file doesn't exist
|
||||
"""
|
||||
import json
|
||||
settings_file = os.path.join(datastore_path, f"{plugin_id}.json")
|
||||
|
||||
if not os.path.exists(settings_file):
|
||||
return {}
|
||||
|
||||
try:
|
||||
with open(settings_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load settings for plugin '{plugin_id}': {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def save_plugin_settings(datastore_path, plugin_id, settings):
|
||||
"""Save settings for a specific plugin to JSON file.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher')
|
||||
settings: Dictionary of settings to save
|
||||
|
||||
Returns:
|
||||
bool: True if save was successful, False otherwise
|
||||
"""
|
||||
import json
|
||||
settings_file = os.path.join(datastore_path, f"{plugin_id}.json")
|
||||
|
||||
try:
|
||||
with open(settings_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(settings, f, indent=2, ensure_ascii=False)
|
||||
logger.info(f"Saved settings for plugin '{plugin_id}' to {settings_file}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save settings for plugin '{plugin_id}': {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_plugin_template_paths():
|
||||
"""Get list of plugin template directories for Jinja2 loader.
|
||||
|
||||
Scans both external pluggy plugins and built-in processor plugins.
|
||||
|
||||
Returns:
|
||||
list: List of absolute paths to plugin template directories
|
||||
"""
|
||||
template_paths = []
|
||||
|
||||
# Add the base processors/templates directory (as absolute path)
|
||||
processors_templates_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processors', 'templates')
|
||||
if os.path.isdir(processors_templates_dir):
|
||||
template_paths.append(processors_templates_dir)
|
||||
logger.debug(f"Added base processors template path: {processors_templates_dir}")
|
||||
|
||||
# Scan built-in processor plugins
|
||||
from changedetectionio.processors import find_processors
|
||||
processor_list = find_processors()
|
||||
for processor_module, processor_name in processor_list:
|
||||
# Each processor is a module, check if it has a templates directory
|
||||
if hasattr(processor_module, '__file__'):
|
||||
processor_file = processor_module.__file__
|
||||
if processor_file:
|
||||
# Get the processor directory (e.g., processors/image_ssim_diff/)
|
||||
processor_dir = os.path.dirname(os.path.abspath(processor_file))
|
||||
templates_dir = os.path.join(processor_dir, 'templates')
|
||||
if os.path.isdir(templates_dir):
|
||||
template_paths.append(templates_dir)
|
||||
logger.debug(f"Added processor template path: {templates_dir}")
|
||||
|
||||
# Get all registered external pluggy plugins
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
# Check if plugin has a templates directory
|
||||
if hasattr(plugin_obj, '__file__'):
|
||||
plugin_file = plugin_obj.__file__
|
||||
elif hasattr(plugin_obj, '__module__'):
|
||||
# Get the module file
|
||||
module = sys.modules.get(plugin_obj.__module__)
|
||||
if module and hasattr(module, '__file__'):
|
||||
plugin_file = module.__file__
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
if plugin_file:
|
||||
plugin_dir = os.path.dirname(os.path.abspath(plugin_file))
|
||||
templates_dir = os.path.join(plugin_dir, 'templates')
|
||||
if os.path.isdir(templates_dir):
|
||||
template_paths.append(templates_dir)
|
||||
logger.debug(f"Added plugin template path: {templates_dir}")
|
||||
|
||||
return template_paths
|
||||
@@ -1,178 +1,10 @@
|
||||
from abc import abstractmethod
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
from functools import lru_cache
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
from flask_babel import gettext
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
import pkgutil
|
||||
import re
|
||||
|
||||
class difference_detection_processor():
|
||||
|
||||
browser_steps = None
|
||||
datastore = None
|
||||
fetcher = None
|
||||
screenshot = None
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
|
||||
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.datastore = datastore
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
custom_browser_connection_url = None
|
||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'html_webdriver'
|
||||
custom_browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
||||
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
||||
# @todo needs test to or a fix
|
||||
if self.watch.is_pdf:
|
||||
prefer_fetch_backend = "html_requests"
|
||||
|
||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||
from changedetectionio import content_fetchers
|
||||
if hasattr(content_fetchers, prefer_fetch_backend):
|
||||
# @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
|
||||
if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
|
||||
# This is never supported in selenium anyway
|
||||
logger.warning("Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
|
||||
from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
|
||||
fetcher_obj = playwright_fetcher
|
||||
else:
|
||||
fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
|
||||
else:
|
||||
# What it referenced doesnt exist, Just use a default
|
||||
fetcher_obj = getattr(content_fetchers, "html_requests")
|
||||
|
||||
proxy_url = None
|
||||
if preferred_proxy_id:
|
||||
# Custom browser endpoints should NOT have a proxy added
|
||||
if not prefer_fetch_backend.startswith('extra_browser_'):
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
custom_browser_connection_url=custom_browser_connection_url
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
request_headers.update(self.watch.get('headers', {}))
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
for header_name in request_headers:
|
||||
request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))})
|
||||
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
|
||||
request_body = self.watch.get('body')
|
||||
if request_body:
|
||||
request_body = jinja_render(template_str=self.watch.get('body'))
|
||||
|
||||
request_method = self.watch.get('method')
|
||||
ignore_status_codes = self.watch.get('ignore_status_codes', False)
|
||||
|
||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||
if self.watch.get('webdriver_delay'):
|
||||
self.fetcher.render_extract_delay = self.watch.get('webdriver_delay')
|
||||
elif system_webdriver_delay is not None:
|
||||
self.fetcher.render_extract_delay = system_webdriver_delay
|
||||
|
||||
if self.watch.get('webdriver_js_execute_code') is not None and self.watch.get('webdriver_js_execute_code').strip():
|
||||
self.fetcher.webdriver_js_execute_code = self.watch.get('webdriver_js_execute_code')
|
||||
|
||||
# Requests for PDF's, images etc should be passwd the is_binary flag
|
||||
is_binary = self.watch.is_pdf
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
# All fetchers are now async
|
||||
await self.fetcher.run(
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=self.watch.favicon_is_expired(),
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
)
|
||||
|
||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||
self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
changed_detected = False
|
||||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
|
||||
|
||||
def find_sub_packages(package_name):
|
||||
"""
|
||||
@@ -196,6 +28,7 @@ def find_processors():
|
||||
|
||||
processors = []
|
||||
sub_packages = find_sub_packages(package_name)
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
|
||||
for sub_package in sub_packages:
|
||||
module_name = f"{package_name}.{sub_package}.processor"
|
||||
@@ -204,8 +37,12 @@ def find_processors():
|
||||
|
||||
# Iterate through all classes in the module
|
||||
for name, obj in inspect.getmembers(module, inspect.isclass):
|
||||
if issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor:
|
||||
# Only register classes that are actually defined in this module (not imported)
|
||||
if (issubclass(obj, difference_detection_processor) and
|
||||
obj is not difference_detection_processor and
|
||||
obj.__module__ == module.__name__):
|
||||
processors.append((module, sub_package))
|
||||
break # Only need one processor per module
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
@@ -240,17 +77,205 @@ def get_custom_watch_obj_for_processor(processor_name):
|
||||
return watch_class
|
||||
|
||||
|
||||
def find_processor_module(processor_name):
|
||||
"""
|
||||
Find the processor module by name.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'image_ssim_diff')
|
||||
|
||||
Returns:
|
||||
module: The processor's parent module, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if processor_tuple:
|
||||
# Return the parent module (the package containing processor.py)
|
||||
return get_parent_module(processor_tuple[0])
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if ALLOWED_PROCESSORS env var is set
|
||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
||||
allowed_processors = None
|
||||
if allowed_processors_env:
|
||||
# Parse comma-separated list and strip whitespace
|
||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
||||
|
||||
available = []
|
||||
for package, processor_class in processor_classes:
|
||||
available.append((processor_class, package.name))
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Filter by allowed processors if set
|
||||
if allowed_processors and sub_package_name not in allowed_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
||||
continue
|
||||
|
||||
return available
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
else:
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
|
||||
available.append((sub_package_name, description, weight))
|
||||
|
||||
# Sort by weight (lower weight = appears first)
|
||||
available.sort(key=lambda x: x[2])
|
||||
|
||||
# Return as tuples without weight (for backwards compatibility)
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
|
||||
def get_processor_badge_texts():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their list_badge_text values.
|
||||
Translations are applied based on the current request locale.
|
||||
|
||||
:return: A dict mapping processor name to badge text (e.g., {'text_json_diff': 'Text', 'restock_diff': 'Restock'})
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
badge_texts = {}
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Try to get the 'list_badge_text' attribute from the processor module
|
||||
if hasattr(module, 'list_badge_text'):
|
||||
badge_texts[sub_package_name] = gettext(module.list_badge_text)
|
||||
else:
|
||||
# Fall back to parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'list_badge_text'):
|
||||
badge_texts[sub_package_name] = gettext(parent_module.list_badge_text)
|
||||
|
||||
return badge_texts
|
||||
|
||||
|
||||
def get_processor_descriptions():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their description/name values.
|
||||
Translations are applied based on the current request locale.
|
||||
|
||||
:return: A dict mapping processor name to description (e.g., {'text_json_diff': 'Webpage Text/HTML, JSON and PDF changes'})
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
descriptions = {}
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Try to get the 'name' or 'description' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
descriptions[sub_package_name] = gettext(module.name)
|
||||
elif hasattr(module, 'description'):
|
||||
descriptions[sub_package_name] = gettext(module.description)
|
||||
else:
|
||||
# Fall back to parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
descriptions[sub_package_name] = gettext(parent_module.processor_description)
|
||||
elif parent_module and hasattr(parent_module, 'name'):
|
||||
descriptions[sub_package_name] = gettext(parent_module.name)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
descriptions[sub_package_name] = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
return descriptions
|
||||
|
||||
|
||||
def generate_processor_badge_colors(processor_name):
|
||||
"""
|
||||
Generate consistent colors for a processor badge based on its name.
|
||||
Uses a hash of the processor name to generate pleasing, accessible colors
|
||||
for both light and dark modes.
|
||||
|
||||
:param processor_name: The processor name (e.g., 'text_json_diff')
|
||||
:return: A dict with 'light' and 'dark' color schemes, each containing 'bg' and 'color'
|
||||
"""
|
||||
import hashlib
|
||||
|
||||
# Generate a consistent hash from the processor name
|
||||
hash_obj = hashlib.md5(processor_name.encode('utf-8'))
|
||||
hash_int = int(hash_obj.hexdigest()[:8], 16)
|
||||
|
||||
# Generate hue from hash (0-360)
|
||||
hue = hash_int % 360
|
||||
|
||||
# Light mode: pastel background with darker text
|
||||
light_saturation = 60 + (hash_int % 25) # 60-85%
|
||||
light_lightness = 85 + (hash_int % 10) # 85-95% - very light
|
||||
text_lightness = 25 + (hash_int % 15) # 25-40% - dark
|
||||
|
||||
# Dark mode: solid, vibrant colors with white text
|
||||
dark_saturation = 55 + (hash_int % 20) # 55-75%
|
||||
dark_lightness = 45 + (hash_int % 15) # 45-60%
|
||||
|
||||
return {
|
||||
'light': {
|
||||
'bg': f'hsl({hue}, {light_saturation}%, {light_lightness}%)',
|
||||
'color': f'hsl({hue}, 50%, {text_lightness}%)'
|
||||
},
|
||||
'dark': {
|
||||
'bg': f'hsl({hue}, {dark_saturation}%, {dark_lightness}%)',
|
||||
'color': '#fff'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_processor_badge_css():
|
||||
"""
|
||||
Generate CSS for all processor badges with auto-generated colors.
|
||||
This creates CSS rules for both light and dark modes for each processor.
|
||||
|
||||
:return: A string containing CSS rules for all processor badges
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
css_rules = []
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
colors = generate_processor_badge_colors(sub_package_name)
|
||||
|
||||
# Light mode rule
|
||||
css_rules.append(
|
||||
f".processor-badge-{sub_package_name} {{\n"
|
||||
f" background-color: {colors['light']['bg']};\n"
|
||||
f" color: {colors['light']['color']};\n"
|
||||
f"}}"
|
||||
)
|
||||
|
||||
# Dark mode rule
|
||||
css_rules.append(
|
||||
f"html[data-darkmode=\"true\"] .processor-badge-{sub_package_name} {{\n"
|
||||
f" background-color: {colors['dark']['bg']};\n"
|
||||
f" color: {colors['dark']['color']};\n"
|
||||
f"}}"
|
||||
)
|
||||
|
||||
return '\n\n'.join(css_rules)
|
||||
|
||||
|
||||
259
changedetectionio/processors/base.py
Normal file
259
changedetectionio/processors/base.py
Normal file
@@ -0,0 +1,259 @@
|
||||
import re
|
||||
import hashlib
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from loguru import logger
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
SCREENSHOT_FORMAT_PNG = 'PNG'
|
||||
|
||||
class difference_detection_processor():
|
||||
browser_steps = None
|
||||
datastore = None
|
||||
fetcher = None
|
||||
screenshot = None
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch_uuid
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(
|
||||
uuid=self.watch.get('uuid'))
|
||||
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
custom_browser_connection_url = None
|
||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'html_webdriver'
|
||||
custom_browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
||||
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
||||
# @todo needs test to or a fix
|
||||
if self.watch.is_pdf:
|
||||
prefer_fetch_backend = "html_requests"
|
||||
|
||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||
from changedetectionio import content_fetchers
|
||||
if hasattr(content_fetchers, prefer_fetch_backend):
|
||||
# @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
|
||||
if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
|
||||
# This is never supported in selenium anyway
|
||||
logger.warning(
|
||||
"Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
|
||||
from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
|
||||
fetcher_obj = playwright_fetcher
|
||||
else:
|
||||
fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
|
||||
else:
|
||||
# What it referenced doesnt exist, Just use a default
|
||||
fetcher_obj = getattr(content_fetchers, "html_requests")
|
||||
|
||||
proxy_url = None
|
||||
if preferred_proxy_id:
|
||||
# Custom browser endpoints should NOT have a proxy added
|
||||
if not prefer_fetch_backend.startswith('extra_browser_'):
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
custom_browser_connection_url=custom_browser_connection_url,
|
||||
screenshot_format=self.screenshot_format
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
request_headers.update(self.watch.get('headers', {}))
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
for header_name in request_headers:
|
||||
request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))})
|
||||
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
|
||||
request_body = self.watch.get('body')
|
||||
if request_body:
|
||||
request_body = jinja_render(template_str=self.watch.get('body'))
|
||||
|
||||
request_method = self.watch.get('method')
|
||||
ignore_status_codes = self.watch.get('ignore_status_codes', False)
|
||||
|
||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||
if self.watch.get('webdriver_delay'):
|
||||
self.fetcher.render_extract_delay = self.watch.get('webdriver_delay')
|
||||
elif system_webdriver_delay is not None:
|
||||
self.fetcher.render_extract_delay = system_webdriver_delay
|
||||
|
||||
if self.watch.get('webdriver_js_execute_code') is not None and self.watch.get('webdriver_js_execute_code').strip():
|
||||
self.fetcher.webdriver_js_execute_code = self.watch.get('webdriver_js_execute_code')
|
||||
|
||||
# Requests for PDF's, images etc should be passwd the is_binary flag
|
||||
is_binary = self.watch.is_pdf
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
# All fetchers are now async
|
||||
await self.fetcher.run(
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=self.watch.favicon_is_expired(),
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
screenshot_format=self.screenshot_format,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
watch_uuid=self.watch_uuid,
|
||||
)
|
||||
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
def get_extra_watch_config(self, filename):
|
||||
"""
|
||||
Read processor-specific JSON config file from watch data directory.
|
||||
|
||||
Args:
|
||||
filename: Name of JSON file (e.g., "visual_ssim_score.json")
|
||||
|
||||
Returns:
|
||||
dict: Parsed JSON data, or empty dict if file doesn't exist
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
return {}
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
return {}
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.warning(f"Failed to read extra watch config {filename}: {e}")
|
||||
return {}
|
||||
|
||||
def update_extra_watch_config(self, filename, data, merge=True):
|
||||
"""
|
||||
Write processor-specific JSON config file to watch data directory.
|
||||
|
||||
Args:
|
||||
filename: Name of JSON file (e.g., "visual_ssim_score.json")
|
||||
data: Dictionary to serialize as JSON
|
||||
merge: If True, merge with existing data; if False, overwrite completely
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
|
||||
return
|
||||
|
||||
# Ensure directory exists
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
try:
|
||||
# If merge is enabled, read existing data first
|
||||
existing_data = {}
|
||||
if merge and os.path.isfile(filepath):
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
existing_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.warning(f"Failed to read existing config for merge: {e}")
|
||||
|
||||
# Merge new data with existing
|
||||
if merge:
|
||||
existing_data.update(data)
|
||||
data_to_save = existing_data
|
||||
else:
|
||||
data_to_save = data
|
||||
|
||||
# Write the data
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(data_to_save, f, indent=2)
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write extra watch config {filename}: {e}")
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
changed_detected = False
|
||||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
131
changedetectionio/processors/extract.py
Normal file
131
changedetectionio/processors/extract.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""
|
||||
Base data extraction module for all processors.
|
||||
|
||||
This module handles extracting data from watch history using regex patterns
|
||||
and exporting to CSV format. This is the default extractor that all processors
|
||||
(text_json_diff, restock_diff, etc.) can use by default or override.
|
||||
"""
|
||||
|
||||
import os
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def render_form(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
|
||||
"""
|
||||
Render the data extraction form.
|
||||
|
||||
Args:
|
||||
watch: The watch object
|
||||
datastore: The ChangeDetectionStore instance
|
||||
request: Flask request object
|
||||
url_for: Flask url_for function
|
||||
render_template: Flask render_template function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
extract_form: Optional pre-built extract form (for error cases)
|
||||
|
||||
Returns:
|
||||
Rendered HTML response with the extraction form
|
||||
"""
|
||||
from changedetectionio import forms
|
||||
|
||||
uuid = watch.get('uuid')
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(
|
||||
formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
|
||||
# Get error information for the template
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||
|
||||
# Use the shared default template from processors/templates/
|
||||
# Processors can override this by creating their own extract.py with custom template logic
|
||||
output = render_template(
|
||||
"extract.html",
|
||||
uuid=uuid,
|
||||
extract_form=extract_form,
|
||||
watch_a=watch,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
screenshot=screenshot_url,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
extra_title=f" - {watch.label} - Extract Data",
|
||||
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
|
||||
pure_menu_fixed=False
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def process_extraction(watch, datastore, request, url_for, make_response, send_from_directory, flash, redirect, extract_form=None):
|
||||
"""
|
||||
Process the data extraction request and return CSV file.
|
||||
|
||||
Args:
|
||||
watch: The watch object
|
||||
datastore: The ChangeDetectionStore instance
|
||||
request: Flask request object
|
||||
url_for: Flask url_for function
|
||||
make_response: Flask make_response function
|
||||
send_from_directory: Flask send_from_directory function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
extract_form: Optional pre-built extract form
|
||||
|
||||
Returns:
|
||||
CSV file download response or redirect to form on error
|
||||
"""
|
||||
from changedetectionio import forms
|
||||
|
||||
uuid = watch.get('uuid')
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(
|
||||
formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
# render_template needs to be imported from Flask for this to work
|
||||
from flask import render_template as flask_render_template
|
||||
return render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=flask_render_template,
|
||||
flash=flash,
|
||||
redirect=redirect,
|
||||
extract_form=extract_form
|
||||
)
|
||||
|
||||
extract_regex = request.form.get('extract_regex', '').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = "0"
|
||||
return response
|
||||
|
||||
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
||||
return redirect(url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid))
|
||||
210
changedetectionio/processors/image_ssim_diff/README.md
Normal file
210
changedetectionio/processors/image_ssim_diff/README.md
Normal file
@@ -0,0 +1,210 @@
|
||||
# Fast Screenshot Comparison Processor
|
||||
|
||||
Visual/screenshot change detection using ultra-fast image comparison algorithms.
|
||||
|
||||
## Overview
|
||||
|
||||
This processor uses **OpenCV** by default for screenshot comparison, providing **50-100x faster** performance compared to the previous SSIM implementation while still detecting meaningful visual changes.
|
||||
|
||||
## Current Features
|
||||
|
||||
- **Ultra-fast OpenCV comparison**: cv2.absdiff with Gaussian blur for noise reduction
|
||||
- **MD5 pre-check**: Fast identical image detection before expensive comparison
|
||||
- **Configurable sensitivity**: Threshold-based change detection
|
||||
- **Three-panel diff view**: Previous | Current | Difference (with red highlights)
|
||||
- **Direct image support**: Works with browser screenshots AND direct image URLs
|
||||
- **Visual selector support**: Compare specific page regions using CSS/XPath selectors
|
||||
- **Download images**: Download any of the three comparison images directly from the diff view
|
||||
|
||||
## Performance
|
||||
|
||||
- **OpenCV (default)**: 50-100x faster than SSIM
|
||||
- **Large screenshots**: Automatic downscaling for diff visualization (configurable via `MAX_DIFF_HEIGHT`/`MAX_DIFF_WIDTH`)
|
||||
- **Memory efficient**: Explicit cleanup of large objects for long-running processes
|
||||
- **JPEG diff images**: Smaller file sizes, faster rendering
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Fetch**: Screenshot captured via browser OR direct image URL fetched
|
||||
2. **MD5 Check**: Quick hash comparison - if identical, skip comparison
|
||||
3. **Region Selection** (optional): Crop to specific page region if visual selector is configured
|
||||
4. **OpenCV Comparison**: Fast pixel-level difference detection with Gaussian blur
|
||||
5. **Change Detection**: Percentage of changed pixels above threshold = change detected
|
||||
6. **Visualization**: Generate diff image with red-highlighted changed regions
|
||||
|
||||
## Architecture
|
||||
|
||||
### Default Method: OpenCV
|
||||
|
||||
The processor uses OpenCV's `cv2.absdiff()` for ultra-fast pixel-level comparison:
|
||||
|
||||
```python
|
||||
# Convert to grayscale
|
||||
gray_from = cv2.cvtColor(image_from, cv2.COLOR_RGB2GRAY)
|
||||
gray_to = cv2.cvtColor(image_to, cv2.COLOR_RGB2GRAY)
|
||||
|
||||
# Apply Gaussian blur (reduces noise, controlled by OPENCV_BLUR_SIGMA env var)
|
||||
gray_from = cv2.GaussianBlur(gray_from, (0, 0), sigma=0.8)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (0, 0), sigma=0.8)
|
||||
|
||||
# Calculate absolute difference
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold (default: 30)
|
||||
_, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Count changed pixels
|
||||
change_percentage = (changed_pixels / total_pixels) * 100
|
||||
```
|
||||
|
||||
### Optional: Pixelmatch
|
||||
|
||||
For users who need better anti-aliasing detection (especially for text-heavy screenshots), **pixelmatch** can be optionally installed:
|
||||
|
||||
```bash
|
||||
pip install pybind11-pixelmatch>=0.1.3
|
||||
```
|
||||
|
||||
**Note**: Pixelmatch uses a C++17 implementation via pybind11 and may have build issues on some platforms (particularly Alpine/musl systems with symbolic link security restrictions). The application will automatically fall back to OpenCV if pixelmatch is not available.
|
||||
|
||||
To use pixelmatch instead of OpenCV, set the environment variable:
|
||||
```bash
|
||||
COMPARISON_METHOD=pixelmatch
|
||||
```
|
||||
|
||||
#### When to use pixelmatch:
|
||||
- Screenshots with lots of text and anti-aliasing
|
||||
- Need to ignore minor font rendering differences between browser versions
|
||||
- 10-20x faster than SSIM (but slower than OpenCV)
|
||||
|
||||
#### When to stick with OpenCV (default):
|
||||
- General webpage monitoring
|
||||
- Maximum performance (50-100x faster than SSIM)
|
||||
- Simple pixel-level change detection
|
||||
- Avoid build dependencies (Alpine/musl systems)
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```bash
|
||||
# Comparison method (opencv or pixelmatch)
|
||||
COMPARISON_METHOD=opencv # Default
|
||||
|
||||
# OpenCV threshold (0-255, lower = more sensitive)
|
||||
COMPARISON_THRESHOLD_OPENCV=30 # Default
|
||||
|
||||
# Pixelmatch threshold (0-100, mapped to 0-1 scale)
|
||||
COMPARISON_THRESHOLD_PIXELMATCH=10 # Default
|
||||
|
||||
# Gaussian blur sigma for OpenCV (0 = no blur, higher = more blur)
|
||||
OPENCV_BLUR_SIGMA=0.8 # Default
|
||||
|
||||
# Minimum change percentage to trigger detection
|
||||
OPENCV_MIN_CHANGE_PERCENT=0.1 # Default (0.1%)
|
||||
PIXELMATCH_MIN_CHANGE_PERCENT=0.1 # Default
|
||||
|
||||
# Diff visualization image size limits (pixels)
|
||||
MAX_DIFF_HEIGHT=8000 # Default
|
||||
MAX_DIFF_WIDTH=900 # Default
|
||||
```
|
||||
|
||||
### Per-Watch Configuration
|
||||
|
||||
- **Comparison Threshold**: Can be configured per-watch in the edit form
|
||||
- Very low sensitivity (10) - Only major changes
|
||||
- Low sensitivity (20) - Significant changes
|
||||
- Medium sensitivity (30) - Moderate changes (default)
|
||||
- High sensitivity (50) - Small changes
|
||||
- Very high sensitivity (75) - Any visible change
|
||||
|
||||
### Visual Selector (Region Comparison)
|
||||
|
||||
Use the "Include filters" field with CSS selectors or XPath to compare only specific page regions:
|
||||
|
||||
```
|
||||
.content-area
|
||||
//div[@id='main']
|
||||
```
|
||||
|
||||
The processor will automatically crop both screenshots to the bounding box of the first matched element.
|
||||
|
||||
## Dependencies
|
||||
|
||||
### Required
|
||||
- `opencv-python-headless>=4.8.0.76` - Fast image comparison
|
||||
- `Pillow (PIL)` - Image loading and manipulation
|
||||
- `numpy` - Array operations
|
||||
|
||||
### Optional
|
||||
- `pybind11-pixelmatch>=0.1.3` - Alternative comparison method with anti-aliasing detection
|
||||
|
||||
## Change Detection Interpretation
|
||||
|
||||
- **0%** = Identical images (or below minimum change threshold)
|
||||
- **0.1-1%** = Minor differences (anti-aliasing, slight rendering differences)
|
||||
- **1-5%** = Noticeable changes (text updates, small content changes)
|
||||
- **5-20%** = Significant changes (layout shifts, content additions)
|
||||
- **>20%** = Major differences (page redesign, large content changes)
|
||||
|
||||
## Technical Notes
|
||||
|
||||
### Memory Management
|
||||
```python
|
||||
# Explicit cleanup for long-running processes
|
||||
img.close() # Close PIL Images
|
||||
buffer.close() # Close BytesIO buffers
|
||||
del large_array # Mark numpy arrays for GC
|
||||
```
|
||||
|
||||
### Diff Image Generation
|
||||
- Format: JPEG (quality=85, optimized)
|
||||
- Highlight: Red overlay (50% blend with original)
|
||||
- Auto-downscaling: Large screenshots downscaled for faster rendering
|
||||
- Base64 embedded: For direct template rendering
|
||||
|
||||
### OpenCV Blur Parameters
|
||||
The Gaussian blur reduces sensitivity to:
|
||||
- Font rendering differences
|
||||
- Anti-aliasing variations
|
||||
- JPEG compression artifacts
|
||||
- Minor pixel shifts (1-2 pixels)
|
||||
|
||||
Increase `OPENCV_BLUR_SIGMA` to make comparison more tolerant of these differences.
|
||||
|
||||
## Comparison: OpenCV vs Pixelmatch vs SSIM
|
||||
|
||||
| Feature | OpenCV | Pixelmatch | SSIM (old) |
|
||||
|---------|--------|------------|------------|
|
||||
| **Speed** | 50-100x faster | 10-20x faster | Baseline |
|
||||
| **Anti-aliasing** | Via blur | Built-in detection | Built-in |
|
||||
| **Text sensitivity** | High | Medium (AA-aware) | Medium |
|
||||
| **Dependencies** | opencv-python-headless | pybind11-pixelmatch + C++ compiler | scikit-image |
|
||||
| **Alpine/musl support** | ✅ Yes | ⚠️ Build issues | ✅ Yes |
|
||||
| **Memory usage** | Low | Low | High |
|
||||
| **Best for** | General use, max speed | Text-heavy screenshots | Deprecated |
|
||||
|
||||
## Migration from SSIM
|
||||
|
||||
If you're upgrading from the old SSIM-based processor:
|
||||
|
||||
1. **Thresholds are different**: SSIM used 0-1 scale (higher = more similar), OpenCV uses 0-255 pixel difference (lower = more similar)
|
||||
2. **Default threshold**: Start with 30 for OpenCV, adjust based on your needs
|
||||
3. **Performance**: Expect dramatically faster comparisons, especially for large screenshots
|
||||
4. **Accuracy**: OpenCV is more sensitive to pixel-level changes; increase `OPENCV_BLUR_SIGMA` if you're getting false positives
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
Potential features for future consideration:
|
||||
|
||||
- **Change region detection**: Highlight specific areas that changed with bounding boxes
|
||||
- **Perceptual hashing**: Pre-screening filter for even faster checks
|
||||
- **Ignore regions**: Exclude specific page areas (ads, timestamps) from comparison
|
||||
- **Text extraction**: OCR-based text comparison for semantic changes
|
||||
- **Adaptive thresholds**: Different sensitivity for different page regions
|
||||
|
||||
## Resources
|
||||
|
||||
- [OpenCV Documentation](https://docs.opencv.org/)
|
||||
- [pybind11-pixelmatch GitHub](https://github.com/whtsky/pybind11-pixelmatch)
|
||||
- [Pixelmatch (original JS library)](https://github.com/mapbox/pixelmatch)
|
||||
32
changedetectionio/processors/image_ssim_diff/__init__.py
Normal file
32
changedetectionio/processors/image_ssim_diff/__init__.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
Visual/screenshot change detection using fast image comparison algorithms.
|
||||
|
||||
This processor compares screenshots using OpenCV (cv2.absdiff),
|
||||
which is 10-100x faster than SSIM while still detecting meaningful visual changes.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
processor_description = "Visual/Screenshot change detection (Fast)"
|
||||
processor_name = "image_ssim_diff"
|
||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||
|
||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||
|
||||
# Subprocess timeout settings
|
||||
# Maximum time to wait for subprocess operations (seconds)
|
||||
POLL_TIMEOUT_ABSOLUTE = int(os.getenv('OPENCV_SUBPROCESS_TIMEOUT', '20'))
|
||||
|
||||
# Template tracking filename
|
||||
CROPPED_IMAGE_TEMPLATE_FILENAME = 'cropped_image_template.png'
|
||||
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS = [
|
||||
('200', 'Low sensitivity (only major changes)'),
|
||||
('80', 'Medium sensitivity (moderate changes - recommended)'),
|
||||
('20', 'High sensitivity (small changes)'),
|
||||
('0', 'Very high sensitivity (any change)')
|
||||
]
|
||||
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT=0.999
|
||||
OPENCV_BLUR_SIGMA=float(os.getenv("OPENCV_BLUR_SIGMA", "3.0"))
|
||||
440
changedetectionio/processors/image_ssim_diff/difference.py
Normal file
440
changedetectionio/processors/image_ssim_diff/difference.py
Normal file
@@ -0,0 +1,440 @@
|
||||
"""
|
||||
Screenshot diff visualization for fast image comparison processor.
|
||||
|
||||
All image operations now use ImageDiffHandler abstraction for clean separation
|
||||
of concerns and easy backend swapping (LibVIPS, OpenCV, PIL, etc.).
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT, PROCESSOR_CONFIG_NAME, \
|
||||
OPENCV_BLUR_SIGMA
|
||||
|
||||
# All image operations now use OpenCV via isolated_opencv subprocess handler
|
||||
# No direct handler imports needed - subprocess isolation handles everything
|
||||
|
||||
# Maximum dimensions for diff visualization (can be overridden via environment variable)
|
||||
# Large screenshots don't need full resolution for visual inspection
|
||||
# Reduced defaults to minimize memory usage - 2000px height is plenty for diff viewing
|
||||
MAX_DIFF_HEIGHT = int(os.getenv('MAX_DIFF_HEIGHT', '8000'))
|
||||
MAX_DIFF_WIDTH = int(os.getenv('MAX_DIFF_WIDTH', '900'))
|
||||
|
||||
|
||||
def get_asset(asset_name, watch, datastore, request):
|
||||
"""
|
||||
Get processor-specific binary assets for streaming.
|
||||
|
||||
Uses ImageDiffHandler for all image operations - no more multiprocessing needed
|
||||
as LibVIPS handles threading/memory internally.
|
||||
|
||||
Supported assets:
|
||||
- 'before': The previous/from screenshot
|
||||
- 'after': The current/to screenshot
|
||||
- 'rendered_diff': The generated diff visualization with red highlights
|
||||
|
||||
Args:
|
||||
asset_name: Name of the asset to retrieve ('before', 'after', 'rendered_diff')
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request (for from_version/to_version query params)
|
||||
|
||||
Returns:
|
||||
tuple: (binary_data, content_type, cache_control_header) or None if not found
|
||||
"""
|
||||
# Get version parameters from query string
|
||||
versions = list(watch.history.keys())
|
||||
|
||||
if len(versions) < 2:
|
||||
return None
|
||||
|
||||
from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0])
|
||||
to_version = request.args.get('to_version', versions[-1])
|
||||
|
||||
# Validate versions exist
|
||||
if from_version not in versions:
|
||||
from_version = versions[-2] if len(versions) >= 2 else versions[0]
|
||||
if to_version not in versions:
|
||||
to_version = versions[-1]
|
||||
|
||||
try:
|
||||
if asset_name == 'before':
|
||||
# Return the 'from' screenshot with bounding box if configured
|
||||
img_bytes = watch.get_history_snapshot(timestamp=from_version)
|
||||
img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore)
|
||||
mime_type = _detect_mime_type(img_bytes)
|
||||
return (img_bytes, mime_type, 'public, max-age=3600')
|
||||
|
||||
elif asset_name == 'after':
|
||||
# Return the 'to' screenshot with bounding box if configured
|
||||
img_bytes = watch.get_history_snapshot(timestamp=to_version)
|
||||
img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore)
|
||||
mime_type = _detect_mime_type(img_bytes)
|
||||
return (img_bytes, mime_type, 'public, max-age=3600')
|
||||
|
||||
elif asset_name == 'rendered_diff':
|
||||
# Generate diff in isolated subprocess to prevent memory leaks
|
||||
# Subprocess provides complete memory isolation
|
||||
from .image_handler import isolated_opencv as process_screenshot_handler
|
||||
|
||||
img_bytes_from = watch.get_history_snapshot(timestamp=from_version)
|
||||
img_bytes_to = watch.get_history_snapshot(timestamp=to_version)
|
||||
|
||||
# Get pixel difference threshold sensitivity (per-watch > global)
|
||||
# This controls how different a pixel must be (0-255 scale) to count as "changed"
|
||||
from changedetectionio import processors
|
||||
processor_instance = processors.difference_detection_processor(datastore, watch.get('uuid'))
|
||||
processor_config = processor_instance.get_extra_watch_config(PROCESSOR_CONFIG_NAME)
|
||||
|
||||
pixel_difference_threshold_sensitivity = processor_config.get('pixel_difference_threshold_sensitivity')
|
||||
if not pixel_difference_threshold_sensitivity:
|
||||
pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get(
|
||||
'pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
|
||||
try:
|
||||
pixel_difference_threshold_sensitivity = int(pixel_difference_threshold_sensitivity)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
|
||||
pixel_difference_threshold_sensitivity = SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
|
||||
logger.debug(f"Pixel difference threshold sensitivity is {pixel_difference_threshold_sensitivity}")
|
||||
|
||||
|
||||
# Generate diff in isolated subprocess (async-safe)
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
process_screenshot_handler.generate_diff_isolated(
|
||||
img_bytes_from,
|
||||
img_bytes_to,
|
||||
pixel_difference_threshold=int(pixel_difference_threshold_sensitivity),
|
||||
blur_sigma=OPENCV_BLUR_SIGMA,
|
||||
max_width=MAX_DIFF_WIDTH,
|
||||
max_height=MAX_DIFF_HEIGHT
|
||||
)
|
||||
)
|
||||
|
||||
# Run in thread to avoid blocking event loop if called from async context
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
diff_image_bytes = result_container[0]
|
||||
|
||||
if diff_image_bytes:
|
||||
# Note: Bounding box drawing on diff not yet implemented
|
||||
return (diff_image_bytes, 'image/jpeg', 'public, max-age=300')
|
||||
else:
|
||||
logger.error("Failed to generate diff in subprocess")
|
||||
return None
|
||||
|
||||
else:
|
||||
# Unknown asset
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get asset '{asset_name}': {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
|
||||
def _detect_mime_type(img_bytes):
|
||||
"""
|
||||
Detect MIME type using puremagic (same as Watch.py).
|
||||
|
||||
Args:
|
||||
img_bytes: Image bytes
|
||||
|
||||
Returns:
|
||||
str: MIME type (e.g., 'image/png', 'image/jpeg')
|
||||
"""
|
||||
try:
|
||||
import puremagic
|
||||
detections = puremagic.magic_string(img_bytes[:2048])
|
||||
if detections:
|
||||
mime_type = detections[0].mime_type
|
||||
logger.trace(f"Detected MIME type: {mime_type}")
|
||||
return mime_type
|
||||
else:
|
||||
logger.trace("No MIME type detected, using 'image/png' fallback")
|
||||
return 'image/png'
|
||||
except Exception as e:
|
||||
logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback")
|
||||
return 'image/png'
|
||||
|
||||
|
||||
def _draw_bounding_box_if_configured(img_bytes, watch, datastore):
|
||||
"""
|
||||
Draw blue bounding box on image if configured in processor settings.
|
||||
Uses isolated subprocess to prevent memory leaks from large images.
|
||||
|
||||
Supports two modes:
|
||||
- "Select by element": Use include_filter to find xpath element bbox
|
||||
- "Draw area": Use manually drawn bounding_box from config
|
||||
|
||||
Args:
|
||||
img_bytes: Image bytes (PNG)
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
|
||||
Returns:
|
||||
Image bytes (possibly with bounding box drawn)
|
||||
"""
|
||||
try:
|
||||
# Get processor configuration
|
||||
from changedetectionio import processors
|
||||
processor_instance = processors.difference_detection_processor(datastore, watch.get('uuid'))
|
||||
processor_name = watch.get('processor', 'default')
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if not processor_config:
|
||||
return img_bytes
|
||||
|
||||
selection_mode = processor_config.get('selection_mode', 'draw')
|
||||
x, y, width, height = None, None, None, None
|
||||
|
||||
# Mode 1: Select by element (use include_filter + xpath_data)
|
||||
if selection_mode == 'element':
|
||||
include_filters = watch.get('include_filters', [])
|
||||
|
||||
if include_filters and len(include_filters) > 0:
|
||||
first_filter = include_filters[0].strip()
|
||||
|
||||
# Get xpath_data from watch history
|
||||
history_keys = list(watch.history.keys())
|
||||
if history_keys:
|
||||
latest_snapshot = watch.get_history_snapshot(timestamp=history_keys[-1])
|
||||
xpath_data_path = watch.get_xpath_data_filepath(timestamp=history_keys[-1])
|
||||
|
||||
try:
|
||||
import gzip
|
||||
with gzip.open(xpath_data_path, 'rt') as f:
|
||||
xpath_data = json.load(f)
|
||||
|
||||
# Find matching element
|
||||
for element in xpath_data.get('size_pos', []):
|
||||
if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'):
|
||||
x = element.get('left', 0)
|
||||
y = element.get('top', 0)
|
||||
width = element.get('width', 0)
|
||||
height = element.get('height', 0)
|
||||
logger.debug(f"Found element bbox for filter '{first_filter}': x={x}, y={y}, w={width}, h={height}")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load xpath_data for element selection: {e}")
|
||||
|
||||
# Mode 2: Draw area (use manually configured bbox)
|
||||
else:
|
||||
bounding_box = processor_config.get('bounding_box')
|
||||
if bounding_box:
|
||||
# Parse bounding box: "x,y,width,height"
|
||||
parts = [int(p.strip()) for p in bounding_box.split(',')]
|
||||
if len(parts) == 4:
|
||||
x, y, width, height = parts
|
||||
else:
|
||||
logger.warning(f"Invalid bounding box format: {bounding_box}")
|
||||
|
||||
# If no bbox found, return original image
|
||||
if x is None or y is None or width is None or height is None:
|
||||
return img_bytes
|
||||
|
||||
# Use isolated subprocess to prevent memory leaks from large images
|
||||
from .image_handler import isolated_opencv
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
# This prevents blocking when called from async context (update worker)
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
isolated_opencv.draw_bounding_box_isolated(
|
||||
img_bytes, x, y, width, height,
|
||||
color=(255, 0, 0), # Blue in BGR format
|
||||
thickness=3
|
||||
)
|
||||
)
|
||||
|
||||
# Always run in thread to avoid blocking event loop if called from async context
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=15)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
result = result_container[0]
|
||||
|
||||
# Return result or original if subprocess failed
|
||||
return result if result else img_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to draw bounding box: {e}")
|
||||
import traceback
|
||||
logger.debug(traceback.format_exc())
|
||||
return img_bytes
|
||||
|
||||
|
||||
def render(watch, datastore, request, url_for, render_template, flash, redirect):
|
||||
"""
|
||||
Render the screenshot comparison diff page.
|
||||
|
||||
Uses ImageDiffHandler for all image operations.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request
|
||||
url_for: Flask url_for function
|
||||
render_template: Flask render_template function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
|
||||
Returns:
|
||||
Rendered template or redirect
|
||||
"""
|
||||
# Get version parameters (from_version, to_version)
|
||||
versions = list(watch.history.keys())
|
||||
|
||||
if len(versions) < 2:
|
||||
flash("Not enough history to compare. Need at least 2 snapshots.", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Default: compare latest two versions
|
||||
from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0])
|
||||
to_version = request.args.get('to_version', versions[-1])
|
||||
|
||||
# Validate versions exist
|
||||
if from_version not in versions:
|
||||
from_version = versions[-2] if len(versions) >= 2 else versions[0]
|
||||
if to_version not in versions:
|
||||
to_version = versions[-1]
|
||||
|
||||
# Get pixel difference threshold sensitivity (per-watch > global > env default)
|
||||
pixel_difference_threshold_sensitivity = watch.get('pixel_difference_threshold_sensitivity')
|
||||
if not pixel_difference_threshold_sensitivity or pixel_difference_threshold_sensitivity == '':
|
||||
pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get('pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
|
||||
|
||||
# Convert to appropriate type
|
||||
try:
|
||||
pixel_difference_threshold_sensitivity = float(pixel_difference_threshold_sensitivity)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
|
||||
pixel_difference_threshold_sensitivity = 30.0
|
||||
|
||||
# Get blur sigma
|
||||
blur_sigma = OPENCV_BLUR_SIGMA
|
||||
|
||||
# Load screenshots from history
|
||||
try:
|
||||
img_bytes_from = watch.get_history_snapshot(timestamp=from_version)
|
||||
img_bytes_to = watch.get_history_snapshot(timestamp=to_version)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load screenshots: {e}")
|
||||
flash(f"Failed to load screenshots: {e}", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Calculate change percentage using isolated subprocess to prevent memory leaks (async-safe)
|
||||
now = time.time()
|
||||
try:
|
||||
from .image_handler import isolated_opencv as process_screenshot_handler
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
process_screenshot_handler.calculate_change_percentage_isolated(
|
||||
img_bytes_from,
|
||||
img_bytes_to,
|
||||
pixel_difference_threshold=int(pixel_difference_threshold_sensitivity),
|
||||
blur_sigma=blur_sigma,
|
||||
max_width=MAX_DIFF_WIDTH,
|
||||
max_height=MAX_DIFF_HEIGHT
|
||||
)
|
||||
)
|
||||
|
||||
# Run in thread to avoid blocking event loop if called from async context
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
change_percentage = result_container[0]
|
||||
|
||||
method_display = f"{process_screenshot_handler.IMPLEMENTATION_NAME} (pixel_diff_threshold: {pixel_difference_threshold_sensitivity:.0f})"
|
||||
logger.debug(f"Done change percentage calculation in {time.time() - now:.2f}s")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to calculate change percentage: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
flash(f"Failed to calculate diff: {e}", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Load historical data if available (for charts/visualization)
|
||||
comparison_data = {}
|
||||
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
|
||||
if os.path.isfile(comparison_config_path):
|
||||
try:
|
||||
with open(comparison_config_path, 'r') as f:
|
||||
comparison_data = json.load(f)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load comparison history data: {e}")
|
||||
|
||||
# Render custom template
|
||||
# Template path is namespaced to avoid conflicts with other processors
|
||||
# Images are now served via separate /processor-asset/ endpoints instead of base64
|
||||
return render_template(
|
||||
'image_ssim_diff/diff.html',
|
||||
change_percentage=change_percentage,
|
||||
comparison_data=comparison_data, # Full history for charts/visualization
|
||||
comparison_method=method_display,
|
||||
current_diff_url=watch['url'],
|
||||
from_version=from_version,
|
||||
percentage_different=change_percentage,
|
||||
threshold=pixel_difference_threshold_sensitivity,
|
||||
to_version=to_version,
|
||||
uuid=watch.get('uuid'),
|
||||
versions=versions,
|
||||
watch=watch,
|
||||
)
|
||||
151
changedetectionio/processors/image_ssim_diff/edit_hook.py
Normal file
151
changedetectionio/processors/image_ssim_diff/edit_hook.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""
|
||||
Optional hook called when processor settings are saved in edit page.
|
||||
|
||||
This hook analyzes the selected region to determine if template matching
|
||||
should be enabled for tracking content movement.
|
||||
|
||||
Template matching is controlled via ENABLE_TEMPLATE_TRACKING env var (default: False).
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
from loguru import logger
|
||||
from changedetectionio import strtobool
|
||||
from . import CROPPED_IMAGE_TEMPLATE_FILENAME
|
||||
|
||||
# Template matching controlled via environment variable (default: disabled)
|
||||
# Set ENABLE_TEMPLATE_TRACKING=True to enable
|
||||
TEMPLATE_MATCHING_ENABLED = strtobool(os.getenv('ENABLE_TEMPLATE_TRACKING', 'False'))
|
||||
IMPORT_ERROR = "Template matching disabled (set ENABLE_TEMPLATE_TRACKING=True to enable)"
|
||||
|
||||
|
||||
def on_config_save(watch, processor_config, datastore):
|
||||
"""
|
||||
Called after processor config is saved in edit page.
|
||||
|
||||
Analyzes the bounding box region to determine if it has enough
|
||||
visual features (texture/edges) to enable template matching for
|
||||
tracking content movement when page layout shifts.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
processor_config: Dict of processor-specific config
|
||||
datastore: Datastore object
|
||||
|
||||
Returns:
|
||||
dict: Updated processor_config with auto_track_region setting
|
||||
"""
|
||||
# Check if template matching is globally enabled via ENV var
|
||||
if not TEMPLATE_MATCHING_ENABLED:
|
||||
logger.debug("Template tracking disabled via ENABLE_TEMPLATE_TRACKING env var")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
bounding_box = processor_config.get('bounding_box')
|
||||
|
||||
if not bounding_box:
|
||||
# No bounding box, disable tracking
|
||||
processor_config['auto_track_region'] = False
|
||||
logger.debug("No bounding box set, disabled auto-tracking")
|
||||
return processor_config
|
||||
|
||||
try:
|
||||
# Get the latest screenshot from watch history
|
||||
history_keys = list(watch.history.keys())
|
||||
if len(history_keys) == 0:
|
||||
logger.warning("No screenshot history available yet, cannot analyze for tracking")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
# Get latest screenshot
|
||||
latest_timestamp = history_keys[-1]
|
||||
screenshot_bytes = watch.get_history_snapshot(timestamp=latest_timestamp)
|
||||
|
||||
if not screenshot_bytes:
|
||||
logger.warning("Could not load screenshot for analysis")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
# Parse bounding box
|
||||
parts = [int(p.strip()) for p in bounding_box.split(',')]
|
||||
if len(parts) != 4:
|
||||
logger.warning("Invalid bounding box format")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
x, y, width, height = parts
|
||||
|
||||
# Analyze the region for features/texture
|
||||
has_enough_features = analyze_region_features(screenshot_bytes, x, y, width, height)
|
||||
|
||||
if has_enough_features:
|
||||
logger.info(f"Region has sufficient features for tracking - enabling auto_track_region")
|
||||
processor_config['auto_track_region'] = True
|
||||
|
||||
# Save the template as cropped.jpg in watch data directory
|
||||
save_template_to_file(watch, screenshot_bytes, x, y, width, height)
|
||||
|
||||
else:
|
||||
logger.info(f"Region lacks distinctive features - disabling auto_track_region")
|
||||
processor_config['auto_track_region'] = False
|
||||
|
||||
# Remove old template file if exists
|
||||
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
if os.path.exists(template_path):
|
||||
os.remove(template_path)
|
||||
logger.debug(f"Removed old template file: {template_path}")
|
||||
|
||||
return processor_config
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing region for tracking: {e}")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
|
||||
def analyze_region_features(screenshot_bytes, x, y, width, height):
|
||||
"""
|
||||
Analyze if a region has enough visual features for template matching.
|
||||
|
||||
Uses OpenCV to detect corners/edges. If the region has distinctive
|
||||
features, template matching can reliably track it when it moves.
|
||||
|
||||
Args:
|
||||
screenshot_bytes: Full screenshot as bytes
|
||||
x, y, width, height: Bounding box coordinates
|
||||
|
||||
Returns:
|
||||
bool: True if region has enough features, False otherwise
|
||||
"""
|
||||
# Template matching disabled - would need OpenCV implementation for region analysis
|
||||
if not TEMPLATE_MATCHING_ENABLED:
|
||||
logger.warning(f"Cannot analyze region features: {IMPORT_ERROR}")
|
||||
return False
|
||||
|
||||
# Note: Original implementation used LibVIPS handler to crop region, then OpenCV
|
||||
# for feature detection (goodFeaturesToTrack, Canny edge detection, variance).
|
||||
# If re-implementing, use OpenCV directly for both cropping and analysis.
|
||||
# Feature detection would use: cv2.goodFeaturesToTrack, cv2.Canny, np.var
|
||||
return False
|
||||
|
||||
|
||||
def save_template_to_file(watch, screenshot_bytes, x, y, width, height):
|
||||
"""
|
||||
Extract the template region and save as cropped_image_template.png in watch data directory.
|
||||
|
||||
This is a convenience wrapper around handler.save_template() that handles
|
||||
watch directory setup and path construction.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
screenshot_bytes: Full screenshot as bytes
|
||||
x, y, width, height: Bounding box coordinates
|
||||
"""
|
||||
# Template matching disabled - would need OpenCV implementation for template saving
|
||||
if not TEMPLATE_MATCHING_ENABLED:
|
||||
logger.warning(f"Cannot save template: {IMPORT_ERROR}")
|
||||
return
|
||||
|
||||
# Note: Original implementation used LibVIPS handler to crop and save region.
|
||||
# If re-implementing, use OpenCV (cv2.imdecode, crop with array slicing, cv2.imwrite).
|
||||
return
|
||||
120
changedetectionio/processors/image_ssim_diff/forms.py
Normal file
120
changedetectionio/processors/image_ssim_diff/forms.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""
|
||||
Configuration forms for fast screenshot comparison processor.
|
||||
"""
|
||||
|
||||
from wtforms import SelectField, StringField, validators, ValidationError, IntegerField
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from changedetectionio.forms import processor_text_json_diff_form
|
||||
import re
|
||||
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS
|
||||
|
||||
|
||||
def validate_bounding_box(form, field):
|
||||
"""Validate bounding box format: x,y,width,height with integers."""
|
||||
if not field.data:
|
||||
return # Optional field
|
||||
|
||||
if len(field.data) > 100:
|
||||
raise ValidationError(_l('Bounding box value is too long'))
|
||||
|
||||
# Should be comma-separated integers
|
||||
if not re.match(r'^\d+,\d+,\d+,\d+$', field.data):
|
||||
raise ValidationError(_l('Bounding box must be in format: x,y,width,height (integers only)'))
|
||||
|
||||
# Validate values are reasonable (not negative, not ridiculously large)
|
||||
parts = [int(p) for p in field.data.split(',')]
|
||||
for part in parts:
|
||||
if part < 0:
|
||||
raise ValidationError(_l('Bounding box values must be non-negative'))
|
||||
if part > 10000: # Reasonable max screen dimension
|
||||
raise ValidationError(_l('Bounding box values are too large'))
|
||||
|
||||
|
||||
def validate_selection_mode(form, field):
|
||||
"""Validate selection mode value."""
|
||||
if not field.data:
|
||||
return # Optional field
|
||||
|
||||
if field.data not in ['element', 'draw']:
|
||||
raise ValidationError(_l('Selection mode must be either "element" or "draw"'))
|
||||
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
"""Form for fast image comparison processor settings."""
|
||||
|
||||
processor_config_min_change_percentage = IntegerField(
|
||||
_l('Minimum Change Percentage'),
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=1, max=100, message=_l('Must be between 0 and 100'))
|
||||
],
|
||||
render_kw={"placeholder": "Use global default (0.1)"}
|
||||
)
|
||||
|
||||
processor_config_pixel_difference_threshold_sensitivity = SelectField(
|
||||
_l('Pixel Difference Sensitivity'),
|
||||
choices=[
|
||||
('', _l('Use global default'))
|
||||
] + SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS,
|
||||
validators=[validators.Optional()],
|
||||
default=''
|
||||
)
|
||||
|
||||
# Processor-specific config fields (stored in separate JSON file)
|
||||
processor_config_bounding_box = StringField(
|
||||
_l('Bounding Box'),
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.Length(max=100, message=_l('Bounding box value is too long')),
|
||||
validate_bounding_box
|
||||
],
|
||||
render_kw={"style": "display: none;", "id": "bounding_box"}
|
||||
)
|
||||
|
||||
processor_config_selection_mode = StringField(
|
||||
_l('Selection Mode'),
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.Length(max=20, message=_l('Selection mode value is too long')),
|
||||
validate_selection_mode
|
||||
],
|
||||
render_kw={"style": "display: none;", "id": "selection_mode"}
|
||||
)
|
||||
|
||||
def extra_tab_content(self):
|
||||
"""Tab label for processor-specific settings."""
|
||||
return _l('Screenshot Comparison')
|
||||
|
||||
def extra_form_content(self):
|
||||
"""Render processor-specific form fields.
|
||||
@NOTE: prepend processor_config_* to the field name so it will save into its own datadir/uuid/image_ssim_diff.json and be read at process time
|
||||
"""
|
||||
return '''
|
||||
{% from '_helpers.html' import render_field %}
|
||||
<fieldset>
|
||||
<legend>Screenshot Comparison Settings</legend>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.processor_config_min_change_percentage) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>What percentage of pixels must change to trigger a detection?</strong><br>
|
||||
For example, <strong>0.1%</strong> means if 0.1% or more of the pixels change, it counts as a change.<br>
|
||||
Lower values = more sensitive (detect smaller changes).<br>
|
||||
Higher values = less sensitive (only detect larger changes).<br>
|
||||
Leave blank to use global default (0.1%).
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.processor_config_pixel_difference_threshold_sensitivity) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>How different must an individual pixel be to count as "changed"?</strong><br>
|
||||
<strong>Low sensitivity (75)</strong> = Only count pixels that changed significantly (0-255 scale).<br>
|
||||
<strong>High sensitivity (20)</strong> = Count pixels with small changes as different.<br>
|
||||
<strong>Very high (0)</strong> = Any pixel change counts.<br>
|
||||
Select "Use global default" to inherit the system-wide setting.
|
||||
</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
'''
|
||||
@@ -0,0 +1,242 @@
|
||||
"""
|
||||
Abstract base class for image processing operations.
|
||||
|
||||
All image operations for the image_ssim_diff processor must be implemented
|
||||
through this interface to allow different backends (libvips, OpenCV, PIL, etc.).
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple, Optional, Any
|
||||
|
||||
|
||||
class ImageDiffHandler(ABC):
|
||||
"""
|
||||
Abstract base class for image processing operations.
|
||||
|
||||
Implementations must handle all image operations needed for screenshot
|
||||
comparison including loading, cropping, resizing, diffing, and overlays.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def load_from_bytes(self, img_bytes: bytes) -> Any:
|
||||
"""
|
||||
Load image from bytes.
|
||||
|
||||
Args:
|
||||
img_bytes: Image data as bytes (PNG, JPEG, etc.)
|
||||
|
||||
Returns:
|
||||
Handler-specific image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_to_bytes(self, img: Any, format: str = 'png', quality: int = 85) -> bytes:
|
||||
"""
|
||||
Save image to bytes.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
format: Output format ('png' or 'jpeg')
|
||||
quality: Quality for JPEG (1-100)
|
||||
|
||||
Returns:
|
||||
Image data as bytes
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def crop(self, img: Any, left: int, top: int, right: int, bottom: int) -> Any:
|
||||
"""
|
||||
Crop image to specified region.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
left: Left coordinate
|
||||
top: Top coordinate
|
||||
right: Right coordinate
|
||||
bottom: Bottom coordinate
|
||||
|
||||
Returns:
|
||||
Cropped image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def resize(self, img: Any, max_width: int, max_height: int) -> Any:
|
||||
"""
|
||||
Resize image maintaining aspect ratio.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
max_width: Maximum width in pixels
|
||||
max_height: Maximum height in pixels
|
||||
|
||||
Returns:
|
||||
Resized image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_dimensions(self, img: Any) -> Tuple[int, int]:
|
||||
"""
|
||||
Get image dimensions.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
|
||||
Returns:
|
||||
Tuple of (width, height)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def to_grayscale(self, img: Any) -> Any:
|
||||
"""
|
||||
Convert image to grayscale.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
|
||||
Returns:
|
||||
Grayscale image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gaussian_blur(self, img: Any, sigma: float) -> Any:
|
||||
"""
|
||||
Apply Gaussian blur to image.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
sigma: Blur sigma value (0 = no blur)
|
||||
|
||||
Returns:
|
||||
Blurred image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def absolute_difference(self, img1: Any, img2: Any) -> Any:
|
||||
"""
|
||||
Calculate absolute difference between two images.
|
||||
|
||||
Args:
|
||||
img1: First image (handler-specific object)
|
||||
img2: Second image (handler-specific object)
|
||||
|
||||
Returns:
|
||||
Difference image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def threshold(self, img: Any, threshold_value: int) -> Tuple[float, Any]:
|
||||
"""
|
||||
Apply threshold to image and calculate change percentage.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object (typically grayscale difference)
|
||||
threshold_value: Threshold value (0-255)
|
||||
|
||||
Returns:
|
||||
Tuple of (change_percentage, binary_mask)
|
||||
- change_percentage: Percentage of pixels above threshold (0-100)
|
||||
- binary_mask: Handler-specific binary mask object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def apply_red_overlay(self, img: Any, mask: Any) -> bytes:
|
||||
"""
|
||||
Apply red overlay to image where mask is True.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object (color)
|
||||
mask: Handler-specific binary mask object
|
||||
|
||||
Returns:
|
||||
JPEG bytes with red overlay applied
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def close(self, img: Any) -> None:
|
||||
"""
|
||||
Clean up image resources if needed.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_template(
|
||||
self,
|
||||
img: Any,
|
||||
template_img: Any,
|
||||
original_bbox: Tuple[int, int, int, int],
|
||||
search_tolerance: float = 0.2
|
||||
) -> Optional[Tuple[int, int, int, int]]:
|
||||
"""
|
||||
Find template in image using template matching.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object to search in
|
||||
template_img: Handler-specific template image object to find
|
||||
original_bbox: Original bounding box (left, top, right, bottom)
|
||||
search_tolerance: How far to search (0.2 = ±20% of region size)
|
||||
|
||||
Returns:
|
||||
New bounding box (left, top, right, bottom) or None if not found
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_template(
|
||||
self,
|
||||
img: Any,
|
||||
bbox: Tuple[int, int, int, int],
|
||||
output_path: str
|
||||
) -> bool:
|
||||
"""
|
||||
Save a cropped region as a template file.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
bbox: Bounding box to crop (left, top, right, bottom)
|
||||
output_path: Where to save the template PNG
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def draw_bounding_box(
|
||||
self,
|
||||
img_bytes: bytes,
|
||||
x: int,
|
||||
y: int,
|
||||
width: int,
|
||||
height: int,
|
||||
color: Tuple[int, int, int] = (255, 0, 0),
|
||||
thickness: int = 3
|
||||
) -> bytes:
|
||||
"""
|
||||
Draw a bounding box rectangle on image.
|
||||
|
||||
Args:
|
||||
img_bytes: Image data as bytes
|
||||
x: Left coordinate
|
||||
y: Top coordinate
|
||||
width: Box width
|
||||
height: Box height
|
||||
color: BGR color tuple (default: blue)
|
||||
thickness: Line thickness in pixels
|
||||
|
||||
Returns:
|
||||
Image bytes with bounding box drawn
|
||||
"""
|
||||
pass
|
||||
@@ -0,0 +1,353 @@
|
||||
"""
|
||||
Subprocess-isolated image operations for memory leak prevention.
|
||||
|
||||
LibVIPS accumulates C-level memory in long-running processes that cannot be
|
||||
reclaimed by Python's GC or libvips cache management. Using subprocess isolation
|
||||
ensures complete memory cleanup when the process exits.
|
||||
|
||||
This module wraps LibvipsImageDiffHandler operations in multiprocessing for
|
||||
complete memory isolation without code duplication.
|
||||
|
||||
Research: https://github.com/libvips/pyvips/issues/234
|
||||
"""
|
||||
|
||||
import multiprocessing
|
||||
|
||||
# CRITICAL: Use 'spawn' instead of 'fork' to avoid inheriting parent's
|
||||
# LibVIPS threading state which can cause hangs in gaussblur operations
|
||||
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
||||
try:
|
||||
multiprocessing.set_start_method('spawn', force=False)
|
||||
except RuntimeError:
|
||||
# Already set, ignore
|
||||
pass
|
||||
|
||||
|
||||
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Worker: Generate diff visualization using LibvipsImageDiffHandler in isolated subprocess.
|
||||
|
||||
This runs in a separate process for complete memory isolation.
|
||||
Uses print() instead of loguru to avoid forking issues.
|
||||
"""
|
||||
try:
|
||||
# Import handler inside worker
|
||||
from .libvips_handler import LibvipsImageDiffHandler
|
||||
|
||||
print(f"[Worker] Initializing handler", flush=True)
|
||||
handler = LibvipsImageDiffHandler()
|
||||
|
||||
# Load images using handler
|
||||
img_from = handler.load_from_bytes(img_bytes_from)
|
||||
img_to = handler.load_from_bytes(img_bytes_to)
|
||||
|
||||
# Ensure same size
|
||||
w1, h1 = handler.get_dimensions(img_from)
|
||||
w2, h2 = handler.get_dimensions(img_to)
|
||||
if (w1, h1) != (w2, h2):
|
||||
img_from = handler.resize(img_from, w2, h2)
|
||||
|
||||
# Downscale for faster diff visualization
|
||||
img_from = handler.resize(img_from, max_width, max_height)
|
||||
img_to = handler.resize(img_to, max_width, max_height)
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = handler.to_grayscale(img_from)
|
||||
gray_to = handler.to_grayscale(img_to)
|
||||
|
||||
# Optional blur - DISABLED due to LibVIPS threading issues in fork
|
||||
# gray_from = handler.gaussian_blur(gray_from, blur_sigma)
|
||||
# gray_to = handler.gaussian_blur(gray_to, blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = handler.absolute_difference(gray_from, gray_to)
|
||||
|
||||
# Threshold to get mask
|
||||
_, diff_mask = handler.threshold(diff, int(threshold))
|
||||
|
||||
# Generate diff image with red overlay
|
||||
diff_image_bytes = handler.apply_red_overlay(img_to, diff_mask)
|
||||
|
||||
print(f"[Worker] Generated diff ({len(diff_image_bytes)} bytes)", flush=True)
|
||||
conn.send(diff_image_bytes)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Worker] Error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
conn.send(None)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def generate_diff_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Generate diff visualization in isolated subprocess for memory leak prevention.
|
||||
|
||||
Args:
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
threshold: Pixel difference threshold
|
||||
blur_sigma: Gaussian blur sigma
|
||||
max_width: Maximum width for diff
|
||||
max_height: Maximum height for diff
|
||||
|
||||
Returns:
|
||||
bytes: JPEG diff image or None on failure
|
||||
"""
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
|
||||
p = multiprocessing.Process(
|
||||
target=_worker_generate_diff,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
p.start()
|
||||
|
||||
result = None
|
||||
try:
|
||||
# Wait for result (30 second timeout)
|
||||
if parent_conn.poll(30):
|
||||
result = parent_conn.recv()
|
||||
except Exception as e:
|
||||
print(f"[Parent] Error receiving result: {e}", flush=True)
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown
|
||||
p.join(timeout=5)
|
||||
if p.is_alive():
|
||||
print("[Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
p.terminate()
|
||||
p.join(timeout=3)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print("[Parent] Process didn't terminate, killing", flush=True)
|
||||
p.kill()
|
||||
p.join(timeout=1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Calculate change percentage in isolated subprocess using handler.
|
||||
|
||||
Returns:
|
||||
float: Change percentage
|
||||
"""
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
|
||||
def _worker_calculate(conn):
|
||||
try:
|
||||
# Import handler inside worker
|
||||
from .libvips_handler import LibvipsImageDiffHandler
|
||||
|
||||
handler = LibvipsImageDiffHandler()
|
||||
|
||||
# Load images
|
||||
img_from = handler.load_from_bytes(img_bytes_from)
|
||||
img_to = handler.load_from_bytes(img_bytes_to)
|
||||
|
||||
# Ensure same size
|
||||
w1, h1 = handler.get_dimensions(img_from)
|
||||
w2, h2 = handler.get_dimensions(img_to)
|
||||
if (w1, h1) != (w2, h2):
|
||||
img_from = handler.resize(img_from, w2, h2)
|
||||
|
||||
# Downscale
|
||||
img_from = handler.resize(img_from, max_width, max_height)
|
||||
img_to = handler.resize(img_to, max_width, max_height)
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = handler.to_grayscale(img_from)
|
||||
gray_to = handler.to_grayscale(img_to)
|
||||
|
||||
# Optional blur
|
||||
gray_from = handler.gaussian_blur(gray_from, blur_sigma)
|
||||
gray_to = handler.gaussian_blur(gray_to, blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = handler.absolute_difference(gray_from, gray_to)
|
||||
|
||||
# Threshold and get percentage
|
||||
change_percentage, _ = handler.threshold(diff, int(threshold))
|
||||
|
||||
conn.send(float(change_percentage))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Worker] Calculate error: {e}", flush=True)
|
||||
conn.send(0.0)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = multiprocessing.Process(target=_worker_calculate, args=(child_conn,))
|
||||
p.start()
|
||||
|
||||
result = 0.0
|
||||
try:
|
||||
if parent_conn.poll(30):
|
||||
result = parent_conn.recv()
|
||||
except Exception as e:
|
||||
print(f"[Parent] Calculate error receiving result: {e}", flush=True)
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown
|
||||
p.join(timeout=5)
|
||||
if p.is_alive():
|
||||
print("[Parent] Calculate process didn't exit gracefully, terminating", flush=True)
|
||||
p.terminate()
|
||||
p.join(timeout=3)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print("[Parent] Calculate process didn't terminate, killing", flush=True)
|
||||
p.kill()
|
||||
p.join(timeout=1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, min_change_percentage, crop_region=None):
|
||||
"""
|
||||
Compare images in isolated subprocess for change detection.
|
||||
|
||||
Args:
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
threshold: Pixel difference threshold
|
||||
blur_sigma: Gaussian blur sigma
|
||||
min_change_percentage: Minimum percentage to trigger change detection
|
||||
crop_region: Optional tuple (left, top, right, bottom) for cropping both images
|
||||
|
||||
Returns:
|
||||
tuple: (changed_detected, change_percentage)
|
||||
"""
|
||||
print(f"[Parent] Starting compare_images_isolated subprocess", flush=True)
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
|
||||
def _worker_compare(conn):
|
||||
try:
|
||||
print(f"[Worker] Compare worker starting", flush=True)
|
||||
# Import handler inside worker
|
||||
from .libvips_handler import LibvipsImageDiffHandler
|
||||
|
||||
print(f"[Worker] Initializing handler", flush=True)
|
||||
handler = LibvipsImageDiffHandler()
|
||||
|
||||
# Load images
|
||||
print(f"[Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True)
|
||||
img_from = handler.load_from_bytes(img_bytes_from)
|
||||
img_to = handler.load_from_bytes(img_bytes_to)
|
||||
print(f"[Worker] Images loaded", flush=True)
|
||||
|
||||
# Crop if region specified
|
||||
if crop_region:
|
||||
print(f"[Worker] Cropping to region {crop_region}", flush=True)
|
||||
left, top, right, bottom = crop_region
|
||||
img_from = handler.crop(img_from, left, top, right, bottom)
|
||||
img_to = handler.crop(img_to, left, top, right, bottom)
|
||||
print(f"[Worker] Cropping completed", flush=True)
|
||||
|
||||
# Ensure same size
|
||||
w1, h1 = handler.get_dimensions(img_from)
|
||||
w2, h2 = handler.get_dimensions(img_to)
|
||||
print(f"[Worker] Image dimensions: from={w1}x{h1}, to={w2}x{h2}", flush=True)
|
||||
if (w1, h1) != (w2, h2):
|
||||
print(f"[Worker] Resizing to match dimensions", flush=True)
|
||||
img_from = handler.resize(img_from, w2, h2)
|
||||
|
||||
# Convert to grayscale
|
||||
print(f"[Worker] Converting to grayscale", flush=True)
|
||||
gray_from = handler.to_grayscale(img_from)
|
||||
gray_to = handler.to_grayscale(img_to)
|
||||
|
||||
# Optional blur
|
||||
# NOTE: gaussblur can hang in forked subprocesses due to LibVIPS threading
|
||||
# Skip blur as a workaround - sigma=0.8 is subtle and comparison works without it
|
||||
if blur_sigma > 0:
|
||||
print(f"[Worker] Skipping blur (sigma={blur_sigma}) due to LibVIPS threading issues in fork", flush=True)
|
||||
# gray_from = handler.gaussian_blur(gray_from, blur_sigma)
|
||||
# gray_to = handler.gaussian_blur(gray_to, blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
print(f"[Worker] Calculating difference", flush=True)
|
||||
diff = handler.absolute_difference(gray_from, gray_to)
|
||||
|
||||
# Threshold and get percentage
|
||||
print(f"[Worker] Applying threshold ({threshold})", flush=True)
|
||||
change_percentage, _ = handler.threshold(diff, int(threshold))
|
||||
|
||||
# Determine if change detected
|
||||
changed_detected = change_percentage > min_change_percentage
|
||||
|
||||
print(f"[Worker] Comparison complete: changed={changed_detected}, percentage={change_percentage:.2f}%", flush=True)
|
||||
conn.send((changed_detected, float(change_percentage)))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Worker] Compare error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
conn.send((False, 0.0))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = multiprocessing.Process(target=_worker_compare, args=(child_conn,))
|
||||
print(f"[Parent] Starting subprocess (pid will be assigned)", flush=True)
|
||||
p.start()
|
||||
print(f"[Parent] Subprocess started (pid={p.pid}), waiting for result (30s timeout)", flush=True)
|
||||
|
||||
result = (False, 0.0)
|
||||
try:
|
||||
if parent_conn.poll(30):
|
||||
print(f"[Parent] Result available, receiving", flush=True)
|
||||
result = parent_conn.recv()
|
||||
print(f"[Parent] Result received: {result}", flush=True)
|
||||
else:
|
||||
print(f"[Parent] Timeout waiting for result after 30s", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Parent] Compare error receiving result: {e}", flush=True)
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown
|
||||
import time
|
||||
print(f"[Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
p.join(timeout=5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print("[Parent] Compare process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
p.join(timeout=3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print("[Parent] Compare process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
p.join(timeout=1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,627 @@
|
||||
"""
|
||||
OpenCV-based subprocess isolation for image comparison.
|
||||
|
||||
OpenCV is much more stable in multiprocessing contexts than LibVIPS.
|
||||
No threading issues, no fork problems, picklable functions.
|
||||
"""
|
||||
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
from .. import POLL_TIMEOUT_ABSOLUTE
|
||||
|
||||
# Public implementation name for logging
|
||||
IMPLEMENTATION_NAME = "OpenCV"
|
||||
|
||||
|
||||
def _worker_compare(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region):
|
||||
"""
|
||||
Worker function for image comparison (must be top-level for pickling with spawn).
|
||||
|
||||
Args:
|
||||
conn: Pipe connection for sending results
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed
|
||||
blur_sigma: Gaussian blur sigma
|
||||
crop_region: Optional (left, top, right, bottom) crop coordinates
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
# CRITICAL: Disable OpenCV threading to prevent thread explosion
|
||||
# With multiprocessing, each subprocess would otherwise spawn threads equal to CPU cores
|
||||
# This causes excessive thread counts and memory overhead
|
||||
# Research: https://medium.com/@rachittayal7/a-note-on-opencv-threads-performance-in-prod-d10180716fba
|
||||
cv2.setNumThreads(1)
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Compare worker starting (threads=1 for memory optimization)", flush=True)
|
||||
|
||||
# Decode images from bytes
|
||||
print(f"[{time.time():.3f}] [Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True)
|
||||
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
|
||||
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
|
||||
|
||||
# Check if decoding succeeded
|
||||
if img_from is None:
|
||||
raise ValueError("Failed to decode 'from' image - may be corrupt or unsupported format")
|
||||
if img_to is None:
|
||||
raise ValueError("Failed to decode 'to' image - may be corrupt or unsupported format")
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Images loaded: from={img_from.shape}, to={img_to.shape}", flush=True)
|
||||
|
||||
# Crop if region specified
|
||||
if crop_region:
|
||||
print(f"[{time.time():.3f}] [Worker] Cropping to region {crop_region}", flush=True)
|
||||
left, top, right, bottom = crop_region
|
||||
img_from = img_from[top:bottom, left:right]
|
||||
img_to = img_to[top:bottom, left:right]
|
||||
print(f"[{time.time():.3f}] [Worker] Cropped: from={img_from.shape}, to={img_to.shape}", flush=True)
|
||||
|
||||
# Resize if dimensions don't match
|
||||
if img_from.shape != img_to.shape:
|
||||
print(f"[{time.time():.3f}] [Worker] Resizing to match dimensions", flush=True)
|
||||
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
|
||||
|
||||
# Convert to grayscale
|
||||
print(f"[{time.time():.3f}] [Worker] Converting to grayscale", flush=True)
|
||||
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
|
||||
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Optional Gaussian blur
|
||||
if blur_sigma > 0:
|
||||
print(f"[{time.time():.3f}] [Worker] Applying Gaussian blur (sigma={blur_sigma})", flush=True)
|
||||
# OpenCV uses kernel size, convert sigma to kernel size: size = 2 * round(3*sigma) + 1
|
||||
ksize = int(2 * round(3 * blur_sigma)) + 1
|
||||
if ksize % 2 == 0: # Must be odd
|
||||
ksize += 1
|
||||
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
|
||||
print(f"[{time.time():.3f}] [Worker] Blur applied (kernel={ksize}x{ksize})", flush=True)
|
||||
|
||||
# Calculate absolute difference
|
||||
print(f"[{time.time():.3f}] [Worker] Calculating absolute difference", flush=True)
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold
|
||||
print(f"[{time.time():.3f}] [Worker] Applying pixel difference threshold ({pixel_difference_threshold})", flush=True)
|
||||
_, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Calculate change percentage
|
||||
total_pixels = thresholded.size
|
||||
changed_pixels = np.count_nonzero(thresholded)
|
||||
change_percentage = (changed_pixels / total_pixels) * 100.0
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Comparison complete: percentage={change_percentage:.2f}%", flush=True)
|
||||
# Return only the score - let the caller decide if it's a "change"
|
||||
conn.send(float(change_percentage))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def compare_images_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region=None):
|
||||
"""
|
||||
Compare images in isolated subprocess using OpenCV (async-safe).
|
||||
|
||||
Args:
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed
|
||||
blur_sigma: Gaussian blur sigma
|
||||
crop_region: Optional (left, top, right, bottom) crop coordinates
|
||||
|
||||
Returns:
|
||||
float: Change percentage (0-100)
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting OpenCV comparison subprocess", flush=True)
|
||||
|
||||
# Use spawn method for clean process (no fork issues)
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_compare,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = 0.0
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Image comparison failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True)
|
||||
break
|
||||
await asyncio.sleep(0) # Yield control to event loop
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for compare_images result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Image comparison subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (async-safe)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Worker function for generating visual diff with red overlay.
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
cv2.setNumThreads(1)
|
||||
print(f"[{time.time():.3f}] [Worker] Generate diff worker starting", flush=True)
|
||||
|
||||
# Decode images
|
||||
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
|
||||
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
|
||||
|
||||
# Resize if needed to match dimensions
|
||||
if img_from.shape != img_to.shape:
|
||||
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
|
||||
|
||||
# Downscale to max dimensions for faster processing
|
||||
h, w = img_to.shape[:2]
|
||||
if w > max_width or h > max_height:
|
||||
scale = min(max_width / w, max_height / h)
|
||||
new_w = int(w * scale)
|
||||
new_h = int(h * scale)
|
||||
img_from = cv2.resize(img_from, (new_w, new_h))
|
||||
img_to = cv2.resize(img_to, (new_w, new_h))
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
|
||||
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Optional blur
|
||||
if blur_sigma > 0:
|
||||
ksize = int(2 * round(3 * blur_sigma)) + 1
|
||||
if ksize % 2 == 0:
|
||||
ksize += 1
|
||||
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold to get mask
|
||||
_, mask = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Create red overlay on original 'to' image
|
||||
# Where mask is 255 (changed), blend 50% red
|
||||
overlay = img_to.copy()
|
||||
overlay[:, :, 2] = np.where(mask > 0,
|
||||
np.clip(overlay[:, :, 2] * 0.5 + 127, 0, 255).astype(np.uint8),
|
||||
overlay[:, :, 2])
|
||||
overlay[:, :, 0:2] = np.where(mask[:, :, np.newaxis] > 0,
|
||||
(overlay[:, :, 0:2] * 0.5).astype(np.uint8),
|
||||
overlay[:, :, 0:2])
|
||||
|
||||
# Encode as JPEG
|
||||
_, encoded = cv2.imencode('.jpg', overlay, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
||||
diff_bytes = encoded.tobytes()
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Generated diff ({len(diff_bytes)} bytes)", flush=True)
|
||||
conn.send(diff_bytes)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Generate diff error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def generate_diff_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Generate visual diff with red overlay in isolated subprocess (async-safe).
|
||||
|
||||
Returns:
|
||||
bytes: JPEG diff image or None on failure
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting generate_diff subprocess", flush=True)
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_generate_diff,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = None
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Generate diff failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True)
|
||||
break
|
||||
await asyncio.sleep(0) # Yield control to event loop
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for generate_diff result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Generate diff subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving diff: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (async-safe)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _worker_draw_bounding_box(conn, img_bytes, x, y, width, height, color, thickness):
|
||||
"""
|
||||
Worker function for drawing bounding box on image.
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
cv2.setNumThreads(1)
|
||||
print(f"[{time.time():.3f}] [Worker] Draw bounding box worker starting", flush=True)
|
||||
|
||||
# Decode image
|
||||
img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
print(f"[{time.time():.3f}] [Worker] Failed to decode image", flush=True)
|
||||
conn.send(None)
|
||||
return
|
||||
|
||||
# Draw rectangle (BGR format)
|
||||
cv2.rectangle(img, (x, y), (x + width, y + height), color, thickness)
|
||||
|
||||
# Encode back to PNG
|
||||
_, encoded = cv2.imencode('.png', img)
|
||||
result_bytes = encoded.tobytes()
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Bounding box drawn ({len(result_bytes)} bytes)", flush=True)
|
||||
conn.send(result_bytes)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Draw bounding box error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def draw_bounding_box_isolated(img_bytes, x, y, width, height, color=(255, 0, 0), thickness=3):
|
||||
"""
|
||||
Draw bounding box on image in isolated subprocess (async-safe).
|
||||
|
||||
Args:
|
||||
img_bytes: Image data as bytes
|
||||
x: Left coordinate
|
||||
y: Top coordinate
|
||||
width: Box width
|
||||
height: Box height
|
||||
color: BGR color tuple (default: blue)
|
||||
thickness: Line thickness in pixels
|
||||
|
||||
Returns:
|
||||
bytes: PNG image with bounding box or None on failure
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting draw_bounding_box subprocess", flush=True)
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_draw_bounding_box,
|
||||
args=(child_conn, img_bytes, x, y, width, height, color, thickness)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = None
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
# Run recv() in thread too
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Draw bounding box failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True)
|
||||
break
|
||||
# Yield control to event loop
|
||||
await asyncio.sleep(0)
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for draw_bounding_box result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Draw bounding box subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (run join in thread to avoid blocking)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (3s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 2)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _worker_calculate_percentage(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Worker function for calculating change percentage.
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
cv2.setNumThreads(1)
|
||||
|
||||
# Decode images
|
||||
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
|
||||
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
|
||||
|
||||
# Resize if needed
|
||||
if img_from.shape != img_to.shape:
|
||||
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
|
||||
|
||||
# Downscale to max dimensions
|
||||
h, w = img_to.shape[:2]
|
||||
if w > max_width or h > max_height:
|
||||
scale = min(max_width / w, max_height / h)
|
||||
new_w = int(w * scale)
|
||||
new_h = int(h * scale)
|
||||
img_from = cv2.resize(img_from, (new_w, new_h))
|
||||
img_to = cv2.resize(img_to, (new_w, new_h))
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
|
||||
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Optional blur
|
||||
if blur_sigma > 0:
|
||||
ksize = int(2 * round(3 * blur_sigma)) + 1
|
||||
if ksize % 2 == 0:
|
||||
ksize += 1
|
||||
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold
|
||||
_, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Calculate percentage
|
||||
total_pixels = thresholded.size
|
||||
changed_pixels = np.count_nonzero(thresholded)
|
||||
change_percentage = (changed_pixels / total_pixels) * 100.0
|
||||
|
||||
conn.send(float(change_percentage))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Calculate percentage error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Calculate change percentage in isolated subprocess (async-safe).
|
||||
|
||||
Returns:
|
||||
float: Change percentage
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting calculate_percentage subprocess", flush=True)
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_calculate_percentage,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = 0.0
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Calculate change percentage failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True)
|
||||
break
|
||||
await asyncio.sleep(0) # Yield control to event loop
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for calculate_change_percentage result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Calculate change percentage subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving percentage: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (async-safe)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,340 @@
|
||||
"""
|
||||
LibVIPS implementation of ImageDiffHandler.
|
||||
|
||||
Uses pyvips for high-performance image processing with streaming architecture
|
||||
and low memory footprint. Ideal for large screenshots (8000px+).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import os
|
||||
from typing import Tuple, Any, TYPE_CHECKING
|
||||
from loguru import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pyvips
|
||||
|
||||
try:
|
||||
import pyvips
|
||||
PYVIPS_AVAILABLE = True
|
||||
except ImportError:
|
||||
PYVIPS_AVAILABLE = False
|
||||
logger.warning("pyvips not available - install with: pip install pyvips")
|
||||
|
||||
from . import ImageDiffHandler
|
||||
|
||||
|
||||
class LibvipsImageDiffHandler(ImageDiffHandler):
|
||||
"""
|
||||
LibVIPS implementation using streaming architecture.
|
||||
|
||||
Benefits:
|
||||
- 3x faster than ImageMagick
|
||||
- 5x less memory than PIL
|
||||
- Automatic multi-threading
|
||||
- Streaming - processes images in chunks
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
if not PYVIPS_AVAILABLE:
|
||||
raise ImportError("pyvips is not installed. Install with: pip install pyvips")
|
||||
|
||||
def load_from_bytes(self, img_bytes: bytes) -> pyvips.Image:
|
||||
"""Load image from bytes using libvips streaming."""
|
||||
return pyvips.Image.new_from_buffer(img_bytes, '')
|
||||
|
||||
def save_to_bytes(self, img: pyvips.Image, format: str = 'png', quality: int = 85) -> bytes:
|
||||
"""
|
||||
Save image to bytes using temp file.
|
||||
|
||||
Note: Uses temp file instead of write_to_buffer() to avoid C memory leak.
|
||||
See: https://github.com/libvips/pyvips/issues/234
|
||||
"""
|
||||
import tempfile
|
||||
|
||||
format = format.lower()
|
||||
|
||||
try:
|
||||
if format == 'png':
|
||||
suffix = '.png'
|
||||
write_args = {'compression': 6}
|
||||
elif format in ['jpg', 'jpeg']:
|
||||
suffix = '.jpg'
|
||||
write_args = {'Q': quality}
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {format}")
|
||||
|
||||
# Use temp file to avoid write_to_buffer() memory leak
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||||
temp_path = tmp.name
|
||||
|
||||
# Write to file
|
||||
img.write_to_file(temp_path, **write_args)
|
||||
|
||||
# Read bytes and clean up
|
||||
with open(temp_path, 'rb') as f:
|
||||
image_bytes = f.read()
|
||||
|
||||
os.unlink(temp_path)
|
||||
return image_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save via temp file: {e}")
|
||||
# Fallback to write_to_buffer if temp file fails
|
||||
if format == 'png':
|
||||
return img.write_to_buffer('.png', compression=6)
|
||||
else:
|
||||
return img.write_to_buffer('.jpg', Q=quality)
|
||||
|
||||
def crop(self, img: pyvips.Image, left: int, top: int, right: int, bottom: int) -> pyvips.Image:
|
||||
"""Crop image using libvips."""
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
return img.crop(left, top, width, height)
|
||||
|
||||
def resize(self, img: pyvips.Image, max_width: int, max_height: int) -> pyvips.Image:
|
||||
"""
|
||||
Resize image maintaining aspect ratio.
|
||||
|
||||
Uses thumbnail_image for efficient downscaling with streaming.
|
||||
"""
|
||||
width, height = img.width, img.height
|
||||
|
||||
if width <= max_width and height <= max_height:
|
||||
return img
|
||||
|
||||
# Calculate scaling to fit within max dimensions
|
||||
width_ratio = max_width / width if width > max_width else 1.0
|
||||
height_ratio = max_height / height if height > max_height else 1.0
|
||||
ratio = min(width_ratio, height_ratio)
|
||||
|
||||
new_width = int(width * ratio)
|
||||
new_height = int(height * ratio)
|
||||
|
||||
logger.debug(f"Resizing image: {width}x{height} -> {new_width}x{new_height}")
|
||||
|
||||
# thumbnail_image is faster than resize for downscaling
|
||||
return img.thumbnail_image(new_width, height=new_height)
|
||||
|
||||
def get_dimensions(self, img: pyvips.Image) -> Tuple[int, int]:
|
||||
"""Get image dimensions."""
|
||||
return (img.width, img.height)
|
||||
|
||||
def to_grayscale(self, img: pyvips.Image) -> pyvips.Image:
|
||||
"""Convert to grayscale using 'b-w' colorspace."""
|
||||
return img.colourspace('b-w')
|
||||
|
||||
def gaussian_blur(self, img: pyvips.Image, sigma: float) -> pyvips.Image:
|
||||
"""Apply Gaussian blur."""
|
||||
if sigma > 0:
|
||||
return img.gaussblur(sigma)
|
||||
return img
|
||||
|
||||
def absolute_difference(self, img1: pyvips.Image, img2: pyvips.Image) -> pyvips.Image:
|
||||
"""
|
||||
Calculate absolute difference using operator overloading.
|
||||
|
||||
LibVIPS supports arithmetic operations between images.
|
||||
"""
|
||||
return (img1 - img2).abs()
|
||||
|
||||
def threshold(self, img: pyvips.Image, threshold_value: int) -> Tuple[float, pyvips.Image]:
|
||||
"""
|
||||
Apply threshold and calculate change percentage.
|
||||
|
||||
Uses ifthenelse for efficient thresholding.
|
||||
"""
|
||||
# Create binary mask: pixels above threshold = 255, others = 0
|
||||
mask = (img > threshold_value).ifthenelse(255, 0)
|
||||
|
||||
# Calculate percentage by averaging mask values
|
||||
# avg() returns mean pixel value (0-255)
|
||||
# Divide by 255 to get proportion, multiply by 100 for percentage
|
||||
mean_value = mask.avg()
|
||||
change_percentage = (mean_value / 255.0) * 100.0
|
||||
|
||||
return float(change_percentage), mask
|
||||
|
||||
def apply_red_overlay(self, img: pyvips.Image, mask: pyvips.Image) -> bytes:
|
||||
"""
|
||||
Apply red overlay where mask is True (50% blend).
|
||||
|
||||
Args:
|
||||
img: Color image (will be converted to RGB if needed)
|
||||
mask: Binary mask (255 where changed, 0 elsewhere)
|
||||
|
||||
Returns:
|
||||
JPEG bytes with red overlay
|
||||
"""
|
||||
import tempfile
|
||||
|
||||
# Ensure RGB colorspace
|
||||
if img.bands == 1:
|
||||
img = img.colourspace('srgb')
|
||||
|
||||
# Normalize mask to 0-1 range for blending
|
||||
mask_normalized = mask / 255.0
|
||||
|
||||
# Split into R, G, B channels
|
||||
channels = img.bandsplit()
|
||||
r, g, b = channels[0], channels[1], channels[2]
|
||||
|
||||
# Apply red overlay (50% blend):
|
||||
# Where mask is 1: blend 50% original with 50% red (255)
|
||||
# Where mask is 0: keep original
|
||||
r = r * (1 - mask_normalized * 0.5) + 127.5 * mask_normalized
|
||||
g = g * (1 - mask_normalized * 0.5)
|
||||
b = b * (1 - mask_normalized * 0.5)
|
||||
|
||||
# Recombine channels
|
||||
result = r.bandjoin([g, b])
|
||||
|
||||
# CRITICAL: Use temp file instead of write_to_buffer()
|
||||
# write_to_buffer() leaks C memory that isn't returned to OS
|
||||
# See: https://github.com/libvips/pyvips/issues/234
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
|
||||
temp_path = tmp.name
|
||||
|
||||
# Write to file (doesn't leak like write_to_buffer)
|
||||
result.write_to_file(temp_path, Q=85)
|
||||
|
||||
# Read bytes and clean up
|
||||
with open(temp_path, 'rb') as f:
|
||||
image_bytes = f.read()
|
||||
|
||||
os.unlink(temp_path)
|
||||
return image_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write image via temp file: {e}")
|
||||
# Fallback to write_to_buffer if temp file fails
|
||||
return result.write_to_buffer('.jpg', Q=85)
|
||||
|
||||
def close(self, img: pyvips.Image) -> None:
|
||||
"""
|
||||
LibVIPS uses automatic reference counting.
|
||||
|
||||
No explicit cleanup needed - memory freed when references drop to zero.
|
||||
"""
|
||||
pass
|
||||
|
||||
def find_template(
|
||||
self,
|
||||
img: pyvips.Image,
|
||||
template_img: pyvips.Image,
|
||||
original_bbox: Tuple[int, int, int, int],
|
||||
search_tolerance: float = 0.2
|
||||
) -> Tuple[int, int, int, int]:
|
||||
"""
|
||||
Find template in image using OpenCV template matching.
|
||||
|
||||
Note: This temporarily converts to numpy for OpenCV operations since
|
||||
libvips doesn't have template matching built-in.
|
||||
"""
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
left, top, right, bottom = original_bbox
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
|
||||
# Calculate search region
|
||||
margin_x = int(width * search_tolerance)
|
||||
margin_y = int(height * search_tolerance)
|
||||
|
||||
search_left = max(0, left - margin_x)
|
||||
search_top = max(0, top - margin_y)
|
||||
search_right = min(img.width, right + margin_x)
|
||||
search_bottom = min(img.height, bottom + margin_y)
|
||||
|
||||
# Crop search region
|
||||
search_region = self.crop(img, search_left, search_top, search_right, search_bottom)
|
||||
|
||||
# Convert to numpy arrays for OpenCV
|
||||
search_array = np.ndarray(
|
||||
buffer=search_region.write_to_memory(),
|
||||
dtype=np.uint8,
|
||||
shape=[search_region.height, search_region.width, search_region.bands]
|
||||
)
|
||||
template_array = np.ndarray(
|
||||
buffer=template_img.write_to_memory(),
|
||||
dtype=np.uint8,
|
||||
shape=[template_img.height, template_img.width, template_img.bands]
|
||||
)
|
||||
|
||||
# Convert to grayscale
|
||||
if len(search_array.shape) == 3:
|
||||
search_gray = cv2.cvtColor(search_array, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
search_gray = search_array
|
||||
|
||||
if len(template_array.shape) == 3:
|
||||
template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
template_gray = template_array
|
||||
|
||||
logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})")
|
||||
|
||||
# Perform template matching
|
||||
result = cv2.matchTemplate(search_gray, template_gray, cv2.TM_CCOEFF_NORMED)
|
||||
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
||||
|
||||
logger.debug(f"Template matching confidence: {max_val:.2%}")
|
||||
|
||||
# Check if match is good enough (80% confidence threshold)
|
||||
if max_val >= 0.8:
|
||||
# Calculate new bounding box in original image coordinates
|
||||
match_x = search_left + max_loc[0]
|
||||
match_y = search_top + max_loc[1]
|
||||
|
||||
new_bbox = (match_x, match_y, match_x + width, match_y + height)
|
||||
|
||||
# Calculate movement distance
|
||||
move_x = abs(match_x - left)
|
||||
move_y = abs(match_y - top)
|
||||
|
||||
logger.info(f"Template found at ({match_x}, {match_y}), "
|
||||
f"moved {move_x}px horizontally, {move_y}px vertically, "
|
||||
f"confidence: {max_val:.2%}")
|
||||
|
||||
return new_bbox
|
||||
else:
|
||||
logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Template matching error: {e}")
|
||||
return None
|
||||
|
||||
def save_template(
|
||||
self,
|
||||
img: pyvips.Image,
|
||||
bbox: Tuple[int, int, int, int],
|
||||
output_path: str
|
||||
) -> bool:
|
||||
"""
|
||||
Save a cropped region as a template file.
|
||||
"""
|
||||
import os
|
||||
|
||||
try:
|
||||
left, top, right, bottom = bbox
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Crop template region
|
||||
template = self.crop(img, left, top, right, bottom)
|
||||
|
||||
# Save as PNG
|
||||
template.write_to_file(output_path, compression=6)
|
||||
|
||||
logger.info(f"Saved template: {output_path} ({width}x{height}px)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save template: {e}")
|
||||
return False
|
||||
109
changedetectionio/processors/image_ssim_diff/preview.py
Normal file
109
changedetectionio/processors/image_ssim_diff/preview.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Preview rendering for SSIM screenshot processor.
|
||||
|
||||
Renders images properly in the browser instead of showing raw bytes.
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def get_asset(asset_name, watch, datastore, request):
|
||||
"""
|
||||
Get processor-specific binary assets for preview streaming.
|
||||
|
||||
This function supports serving images as separate HTTP responses instead
|
||||
of embedding them as base64 in the HTML template, solving memory issues
|
||||
with large screenshots.
|
||||
|
||||
Supported assets:
|
||||
- 'screenshot': The screenshot for the specified version
|
||||
|
||||
Args:
|
||||
asset_name: Name of the asset to retrieve ('screenshot')
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request (for version query param)
|
||||
|
||||
Returns:
|
||||
tuple: (binary_data, content_type, cache_control_header) or None if not found
|
||||
"""
|
||||
if asset_name != 'screenshot':
|
||||
return None
|
||||
|
||||
versions = list(watch.history.keys())
|
||||
if len(versions) == 0:
|
||||
return None
|
||||
|
||||
# Get the version from query string (default: latest)
|
||||
preferred_version = request.args.get('version')
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
try:
|
||||
screenshot_bytes = watch.get_history_snapshot(timestamp=timestamp)
|
||||
|
||||
# Verify we got bytes (should always be bytes for image files)
|
||||
if not isinstance(screenshot_bytes, bytes):
|
||||
logger.error(f"Expected bytes but got {type(screenshot_bytes)} for screenshot at {timestamp}")
|
||||
return None
|
||||
|
||||
# Detect image format using puremagic (same as Watch.py)
|
||||
try:
|
||||
import puremagic
|
||||
detections = puremagic.magic_string(screenshot_bytes[:2048])
|
||||
if detections:
|
||||
mime_type = detections[0].mime_type
|
||||
logger.trace(f"Detected MIME type: {mime_type}")
|
||||
else:
|
||||
mime_type = 'image/png' # Default fallback
|
||||
except Exception as e:
|
||||
logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback")
|
||||
mime_type = 'image/png'
|
||||
|
||||
return (screenshot_bytes, mime_type, 'public, max-age=10')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load screenshot for preview asset: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def render(watch, datastore, request, url_for, render_template, flash, redirect):
|
||||
"""
|
||||
Render the preview page for screenshot watches.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request
|
||||
url_for: Flask url_for function
|
||||
render_template: Flask render_template function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
|
||||
Returns:
|
||||
Rendered template or redirect
|
||||
"""
|
||||
versions = list(watch.history.keys())
|
||||
|
||||
if len(versions) == 0:
|
||||
flash("Preview unavailable - No snapshots captured yet", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the version to display (default: latest)
|
||||
preferred_version = request.args.get('version')
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
# Render custom template for image preview
|
||||
# Screenshot is now served via separate /processor-asset/ endpoint instead of base64
|
||||
# This significantly reduces memory usage by not embedding large images in HTML
|
||||
return render_template(
|
||||
'image_ssim_diff/preview.html',
|
||||
watch=watch,
|
||||
uuid=watch.get('uuid'),
|
||||
versions=versions,
|
||||
timestamp=timestamp,
|
||||
current_diff_url=watch['url']
|
||||
)
|
||||
241
changedetectionio/processors/image_ssim_diff/processor.py
Normal file
241
changedetectionio/processors/image_ssim_diff/processor.py
Normal file
@@ -0,0 +1,241 @@
|
||||
"""
|
||||
Core fast screenshot comparison processor.
|
||||
|
||||
Uses OpenCV with subprocess isolation for high-performance, low-memory
|
||||
image processing. All operations run in isolated subprocesses for complete
|
||||
memory cleanup and stability.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from loguru import logger
|
||||
from changedetectionio.processors.exceptions import ProcessorException
|
||||
from . import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT, PROCESSOR_CONFIG_NAME, OPENCV_BLUR_SIGMA
|
||||
from ..base import difference_detection_processor, SCREENSHOT_FORMAT_PNG
|
||||
|
||||
# All image operations now use OpenCV via isolated_opencv subprocess handler
|
||||
# Template matching temporarily disabled pending OpenCV implementation
|
||||
|
||||
name = 'Visual / Image screenshot change detection'
|
||||
description = 'Compares screenshots using fast OpenCV algorithm, 10-100x faster than SSIM'
|
||||
processor_weight = 2
|
||||
list_badge_text = "Visual"
|
||||
|
||||
class perform_site_check(difference_detection_processor):
|
||||
"""Fast screenshot comparison processor using OpenCV."""
|
||||
|
||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
"""
|
||||
Perform screenshot comparison using OpenCV subprocess handler.
|
||||
|
||||
Returns:
|
||||
tuple: (changed_detected, update_obj, screenshot_bytes)
|
||||
"""
|
||||
now = time.time()
|
||||
# Get the current screenshot
|
||||
if not self.fetcher.screenshot:
|
||||
raise ProcessorException(
|
||||
message="No screenshot available. Ensure the watch is configured to use a real browser.",
|
||||
url=watch.get('url')
|
||||
)
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Quick MD5 check - skip expensive comparison if images are identical
|
||||
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||
current_md5 = hashlib.md5(self.screenshot).hexdigest()
|
||||
previous_md5 = watch.get('previous_md5')
|
||||
if previous_md5 and current_md5 == previous_md5:
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 unchanged ({current_md5}), skipping comparison")
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
else:
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 changed")
|
||||
|
||||
|
||||
|
||||
# Check if bounding box is set (for drawn area mode)
|
||||
# Read from processor-specific config JSON file (named after processor)
|
||||
crop_region = None
|
||||
|
||||
processor_config = self.get_extra_watch_config(PROCESSOR_CONFIG_NAME)
|
||||
bounding_box = processor_config.get('bounding_box') if processor_config else None
|
||||
|
||||
|
||||
# Get pixel difference threshold sensitivity (per-watch > global)
|
||||
# This controls how different a pixel must be (0-255 scale) to count as "changed"
|
||||
pixel_difference_threshold_sensitivity = processor_config.get('pixel_difference_threshold_sensitivity')
|
||||
if not pixel_difference_threshold_sensitivity:
|
||||
pixel_difference_threshold_sensitivity = self.datastore.data['settings']['application'].get('pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
|
||||
try:
|
||||
pixel_difference_threshold_sensitivity = int(pixel_difference_threshold_sensitivity)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
|
||||
pixel_difference_threshold_sensitivity = SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
|
||||
|
||||
# Get minimum change percentage (per-watch > global > env var default)
|
||||
# This controls what percentage of pixels must change to trigger a detection
|
||||
min_change_percentage = processor_config.get('min_change_percentage')
|
||||
if not min_change_percentage:
|
||||
min_change_percentage = self.datastore.data['settings']['application'].get('min_change_percentage', 1)
|
||||
try:
|
||||
min_change_percentage = int(min_change_percentage)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid min_change_percentage value '{min_change_percentage}', using default 0.1")
|
||||
min_change_percentage = 1
|
||||
|
||||
# Template matching for tracking content movement
|
||||
template_matching_enabled = processor_config.get('auto_track_region', False) #@@todo disabled for now
|
||||
|
||||
if bounding_box:
|
||||
try:
|
||||
# Parse bounding box: "x,y,width,height"
|
||||
parts = [int(p.strip()) for p in bounding_box.split(',')]
|
||||
if len(parts) == 4:
|
||||
x, y, width, height = parts
|
||||
# Crop uses (left, top, right, bottom)
|
||||
crop_region = (max(0, x), max(0, y), x + width, y + height)
|
||||
logger.info(f"UUID: {watch.get('uuid')} - Bounding box enabled: cropping to region {crop_region} (x={x}, y={y}, w={width}, h={height})")
|
||||
else:
|
||||
logger.warning(f"UUID: {watch.get('uuid')} - Invalid bounding box format: {bounding_box} (expected 4 values)")
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse bounding box '{bounding_box}': {e}")
|
||||
|
||||
# If no bounding box, check if visual selector (include_filters) is set for region-based comparison
|
||||
if not crop_region:
|
||||
include_filters = watch.get('include_filters', [])
|
||||
|
||||
if include_filters and len(include_filters) > 0:
|
||||
# Get the first filter to use for cropping
|
||||
first_filter = include_filters[0].strip()
|
||||
|
||||
if first_filter and self.xpath_data:
|
||||
try:
|
||||
import json
|
||||
# xpath_data is JSON string from browser
|
||||
xpath_data_obj = json.loads(self.xpath_data) if isinstance(self.xpath_data, str) else self.xpath_data
|
||||
|
||||
# Find the bounding box for the first filter
|
||||
for element in xpath_data_obj.get('size_pos', []):
|
||||
# Match the filter with the element's xpath
|
||||
if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'):
|
||||
# Found the element - extract crop coordinates
|
||||
left = element.get('left', 0)
|
||||
top = element.get('top', 0)
|
||||
width = element.get('width', 0)
|
||||
height = element.get('height', 0)
|
||||
|
||||
# Crop uses (left, top, right, bottom)
|
||||
crop_region = (max(0, left), max(0, top), left + width, top + height)
|
||||
|
||||
logger.info(f"UUID: {watch.get('uuid')} - Visual selector enabled: cropping to region {crop_region} for filter: {first_filter}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse xpath_data for visual selector: {e}")
|
||||
|
||||
# Store original crop region for template matching
|
||||
original_crop_region = crop_region
|
||||
|
||||
# Check if this is the first check (no previous history)
|
||||
history_keys = list(watch.history.keys())
|
||||
if len(history_keys) == 0:
|
||||
# First check - save baseline, no comparison
|
||||
logger.info(f"UUID: {watch.get('uuid')} - First check for watch {watch.get('uuid')} - saving baseline screenshot")
|
||||
|
||||
# LibVIPS uses automatic reference counting - no explicit cleanup needed
|
||||
update_obj = {
|
||||
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
|
||||
'last_error': False
|
||||
}
|
||||
logger.trace(f"Processed in {time.time() - now:.3f}s")
|
||||
return False, update_obj, self.screenshot
|
||||
|
||||
# Get previous screenshot bytes from history
|
||||
previous_timestamp = history_keys[-1]
|
||||
previous_screenshot_bytes = watch.get_history_snapshot(timestamp=previous_timestamp)
|
||||
|
||||
# Screenshots are stored as PNG, so this should be bytes
|
||||
if isinstance(previous_screenshot_bytes, str):
|
||||
# If it's a string (shouldn't be for screenshots, but handle it)
|
||||
previous_screenshot_bytes = previous_screenshot_bytes.encode('utf-8')
|
||||
|
||||
# Template matching is temporarily disabled pending OpenCV implementation
|
||||
# crop_region calculated above will be used as-is
|
||||
|
||||
# Perform comparison in isolated subprocess to prevent memory leaks
|
||||
try:
|
||||
from .image_handler import isolated_opencv as process_screenshot_handler
|
||||
|
||||
# stuff in watch doesnt need to be there
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - Starting isolated subprocess comparison (crop_region={crop_region})")
|
||||
|
||||
# Compare using isolated subprocess with OpenCV (async-safe to avoid blocking event loop)
|
||||
# Pass raw bytes and crop region - subprocess handles all image operations
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
# This prevents blocking the async update worker's event loop
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
process_screenshot_handler.compare_images_isolated(
|
||||
img_bytes_from=previous_screenshot_bytes,
|
||||
img_bytes_to=self.screenshot,
|
||||
pixel_difference_threshold=pixel_difference_threshold_sensitivity,
|
||||
blur_sigma=OPENCV_BLUR_SIGMA,
|
||||
crop_region=crop_region # Pass crop region for isolated cropping
|
||||
)
|
||||
)
|
||||
|
||||
# Run in thread to avoid blocking event loop when called from async update worker
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
# Subprocess returns only the change score - we decide if it's a "change"
|
||||
change_score = result_container[0]
|
||||
if change_score is None:
|
||||
raise RuntimeError("Image comparison subprocess returned no result")
|
||||
|
||||
changed_detected = change_score > min_change_percentage
|
||||
logger.info(f"UUID: {watch.get('uuid')} - {process_screenshot_handler.IMPLEMENTATION_NAME}: {change_score:.2f}% pixels changed, pixel_diff_threshold_sensitivity: {pixel_difference_threshold_sensitivity:.0f} score={change_score:.2f}%, min_change_threshold={min_change_percentage}%")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"UUID: {watch.get('uuid')} - Failed to compare screenshots: {e}")
|
||||
logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s")
|
||||
|
||||
raise ProcessorException(
|
||||
message=f"UUID: {watch.get('uuid')} - Screenshot comparison failed: {e}",
|
||||
url=watch.get('url')
|
||||
)
|
||||
|
||||
# Return results
|
||||
update_obj = {
|
||||
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
|
||||
'last_error': False
|
||||
}
|
||||
|
||||
if changed_detected:
|
||||
logger.info(f"UUID: {watch.get('uuid')} - Change detected using OpenCV! Score: {change_score:.2f}")
|
||||
else:
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - No significant change using OpenCV. Score: {change_score:.2f}")
|
||||
logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s")
|
||||
|
||||
return changed_detected, update_obj, self.screenshot
|
||||
|
||||
@@ -0,0 +1,235 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
|
||||
{% block content %}
|
||||
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='diff-image.css')}}?v={{ get_css_version() }}">
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
||||
|
||||
<div id="settings">
|
||||
<form class="pure-form " action="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid) }}" method="GET" id="diff-form">
|
||||
<fieldset class="diff-fieldset">
|
||||
{% if versions|length >= 1 %}
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-from" for="diff-from-version" class="from-to-label">From</label>
|
||||
<select id="diff-from-version" name="from_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
|
||||
{{ version }}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-to" for="diff-to-version" class="from-to-label">To</label>
|
||||
<select id="diff-to-version" name="to_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
|
||||
{{ version }}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
<fieldset id="diff-style">
|
||||
<span>
|
||||
<strong>Change Detection:</strong> {{ "%.2f"|format(change_percentage) }}% of pixels changed
|
||||
{% if change_percentage > 0.1 %}
|
||||
<span class="change-detected">⚠ Change Detected</span>
|
||||
{% else %}
|
||||
<span class="no-change">✓ No Significant Change</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
</fieldset>
|
||||
{%- if versions|length >= 2 -%}
|
||||
<div id="keyboard-nav">
|
||||
<strong>Keyboard: </strong>
|
||||
<a href="" class="pure-button pure-button-primary" id="btn-previous"> ← Previous</a>
|
||||
<a class="pure-button pure-button-primary" id="btn-next" href=""> → Next</a>
|
||||
</div>
|
||||
{%- endif -%}
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="screenshot-comparison">
|
||||
<!-- Two-panel layout: Interactive slider + Static diff -->
|
||||
<div class="comparison-grid">
|
||||
<!-- Panel 1: Interactive Comparison Slider (Previous ↔ Current) -->
|
||||
<div class="screenshot-panel">
|
||||
<h3>Interactive Comparison</h3>
|
||||
<div class="comparison-description">
|
||||
Drag slider to compare Previous ({{ from_version|format_timestamp_timeago }})
|
||||
vs Current ({{ to_version|format_timestamp_timeago }})
|
||||
</div>
|
||||
<div style="text-align: center; margin-bottom: 0.5em; display: flex; justify-content: center; gap: 1em;">
|
||||
<a href="#" onclick="downloadImage('img-before', '{{ from_version }}'); return false;" class="download-link" title="Download previous snapshot">
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
|
||||
<path d="M8 12L3 7h3V1h4v6h3z"/>
|
||||
<path d="M1 14h14v2H1z"/>
|
||||
</svg>
|
||||
Previous
|
||||
</a>
|
||||
<a href="#" onclick="downloadImage('img-after', '{{ to_version }}'); return false;" class="download-link" title="Download current snapshot">
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
|
||||
<path d="M8 12L3 7h3V1h4v6h3z"/>
|
||||
<path d="M1 14h14v2H1z"/>
|
||||
</svg>
|
||||
Current
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div class="image-comparison" id="comparison-container">
|
||||
<!-- Before image wrapper (Previous snapshot) -->
|
||||
<div class="comparison-image-wrapper">
|
||||
<img id="img-before" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='before', from_version=from_version, to_version=to_version) }}" alt="Previous screenshot">
|
||||
</div>
|
||||
|
||||
<!-- After image wrapper (Current snapshot) -->
|
||||
<div class="comparison-image-wrapper comparison-after">
|
||||
<img id="img-after" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='after', from_version=from_version, to_version=to_version) }}" alt="Current screenshot">
|
||||
</div>
|
||||
|
||||
<!-- Labels -->
|
||||
<div class="comparison-labels">
|
||||
<span class="comparison-label">Previous</span>
|
||||
<span class="comparison-label">Current</span>
|
||||
</div>
|
||||
|
||||
<!-- Draggable slider -->
|
||||
<div class="comparison-slider" id="comparison-slider">
|
||||
<div class="comparison-handle"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Panel 2: Difference Visualization (Static) -->
|
||||
<div class="screenshot-panel diff">
|
||||
<h3>Difference Visualization</h3>
|
||||
<div class="diff-section-header">
|
||||
<span>Red = Changed Pixels</span>
|
||||
</div>
|
||||
<div style="text-align: center; margin-bottom: 0.5em;">
|
||||
<a href="#" onclick="downloadImage('diff-image', '{{ to_version }}_diff'); return false;" class="download-link" title="Download difference image">
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
|
||||
<path d="M8 12L3 7h3V1h4v6h3z"/>
|
||||
<path d="M1 14h14v2H1z"/>
|
||||
</svg>
|
||||
Download
|
||||
</a>
|
||||
</div>
|
||||
<img id="diff-image" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='rendered_diff', from_version=from_version, to_version=to_version) }}" alt="Difference visualization with red highlights">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if comparison_data and comparison_data.get('history') and comparison_data.history|length > 1 %}
|
||||
<div class="comparison-history-section">
|
||||
<h3>Comparison History</h3>
|
||||
<p>Recent comparison results (last {{ comparison_data.history|length }} checks)</p>
|
||||
<div style="overflow-x: auto;">
|
||||
<table class="pure-table pure-table-striped" style="width: 100%;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Timestamp</th>
|
||||
<th>Change %</th>
|
||||
<th>Method</th>
|
||||
<th>Changed?</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for entry in comparison_data.history|reverse %}
|
||||
<tr>
|
||||
<td>{{ entry.timestamp|format_timestamp_timeago }}</td>
|
||||
<td>{{ "%.2f"|format(entry.change_percentage) }}%</td>
|
||||
<td>{{ entry.method }}</td>
|
||||
<td>
|
||||
{% if entry.changed %}
|
||||
<span class="history-changed-yes">Yes</span>
|
||||
{% else %}
|
||||
<span class="history-changed-no">No</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function downloadImage(imageId, filename) {
|
||||
// Get the image element
|
||||
const img = document.getElementById(imageId);
|
||||
const base64Data = img.src;
|
||||
|
||||
// Convert base64 to blob
|
||||
const byteString = atob(base64Data.split(',')[1]);
|
||||
const mimeString = base64Data.split(',')[0].split(':')[1].split(';')[0];
|
||||
|
||||
const ab = new ArrayBuffer(byteString.length);
|
||||
const ia = new Uint8Array(ab);
|
||||
for (let i = 0; i < byteString.length; i++) {
|
||||
ia[i] = byteString.charCodeAt(i);
|
||||
}
|
||||
|
||||
const blob = new Blob([ab], { type: mimeString });
|
||||
|
||||
// Determine file extension from MIME type
|
||||
const extension = mimeString.includes('jpeg') ? '.jpeg' : '.png';
|
||||
|
||||
// Create download link
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = filename + extension;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
|
||||
// Cleanup
|
||||
setTimeout(() => {
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
}, 100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronize comparison slider width with diff image width
|
||||
* This ensures both panels display images at the same max-width
|
||||
*/
|
||||
function syncComparisonWidth() {
|
||||
const diffImage = document.getElementById('diff-image');
|
||||
const comparisonContainer = document.getElementById('comparison-container');
|
||||
|
||||
if (!diffImage || !comparisonContainer) return;
|
||||
|
||||
// Wait for diff image to load to get its actual rendered width
|
||||
if (diffImage.complete) {
|
||||
applyWidth();
|
||||
} else {
|
||||
diffImage.addEventListener('load', applyWidth);
|
||||
}
|
||||
|
||||
function applyWidth() {
|
||||
const diffImageWidth = diffImage.offsetWidth;
|
||||
if (diffImageWidth > 0) {
|
||||
comparisonContainer.style.maxWidth = diffImageWidth + 'px';
|
||||
comparisonContainer.style.margin = '0 auto';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run on page load
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', syncComparisonWidth);
|
||||
} else {
|
||||
syncComparisonWidth();
|
||||
}
|
||||
|
||||
// Re-sync on window resize
|
||||
window.addEventListener('resize', syncComparisonWidth);
|
||||
</script>
|
||||
|
||||
<script src="{{ url_for('static_content', group='js', filename='comparison-slider.js') }}" defer></script>
|
||||
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,35 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
<script src="{{ url_for('static_content', group='js', filename='preview.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="" method="GET">
|
||||
<fieldset>
|
||||
<label for="preview-version">Select timestamp</label> <select id="preview-version"
|
||||
name="version"
|
||||
class="needs-localtime">
|
||||
{% for version in versions|reverse %}
|
||||
<option value="{{ version }}" {% if version == timestamp %} selected="" {% endif %}>
|
||||
{{ version }}
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||
|
||||
</fieldset>
|
||||
</form>
|
||||
<br>
|
||||
<strong>Keyboard: </strong><a href="" class="pure-button pure-button-primary" id="btn-previous">
|
||||
← Previous</a> <a class="pure-button pure-button-primary" id="btn-next" href="">
|
||||
→ Next</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div id="screenshot-container" style="text-align: center; border: 1px solid #ddd; padding: 2em; background: #fafafa; border-radius: 4px;">
|
||||
<h3 style="margin-top: 0;">Screenshot from {{ timestamp|format_timestamp_timeago }}</h3>
|
||||
<img src="{{ url_for('ui.ui_preview.processor_asset', uuid=uuid, asset_name='screenshot', version=timestamp) }}"
|
||||
alt="Screenshot preview"
|
||||
style="max-width: 100%; height: auto; border: 1px solid #ccc; box-shadow: 0 2px 8px rgba(0,0,0,0.1); border-radius: 2px;">
|
||||
</div>
|
||||
{% endblock %}
|
||||
22
changedetectionio/processors/image_ssim_diff/util.py
Normal file
22
changedetectionio/processors/image_ssim_diff/util.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
DEPRECATED: All multiprocessing functions have been removed.
|
||||
|
||||
The image_ssim_diff processor now uses LibVIPS via ImageDiffHandler abstraction,
|
||||
which provides superior performance and memory efficiency through streaming
|
||||
architecture and automatic threading.
|
||||
|
||||
All image operations are now handled by:
|
||||
- imagehandler.py: Abstract base class defining the interface
|
||||
- libvips_handler.py: LibVIPS implementation with streaming and threading
|
||||
|
||||
Historical note: This file previously contained multiprocessing workers for:
|
||||
- Template matching (find_region_with_template_matching_isolated)
|
||||
- Template regeneration (regenerate_template_isolated)
|
||||
- Image cropping (crop_image_isolated, crop_pil_image_isolated)
|
||||
|
||||
These have been replaced by handler methods which are:
|
||||
- Faster (no subprocess overhead)
|
||||
- More memory efficient (LibVIPS streaming)
|
||||
- Cleaner (no multiprocessing deadlocks)
|
||||
- Better tested (no logger/forking issues)
|
||||
"""
|
||||
@@ -88,7 +88,7 @@ class guess_stream_type():
|
||||
magic_content_header = mime
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
|
||||
logger.warning(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
|
||||
|
||||
# Content-based detection (most reliable for text formats)
|
||||
# Check for HTML patterns first - if found, override magic's text/plain
|
||||
@@ -103,15 +103,15 @@ class guess_stream_type():
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
elif has_html_patterns or http_content_header == 'text/html':
|
||||
self.is_html = True
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# magic will call a rss document 'xml'
|
||||
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
|
||||
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
|
||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
|
||||
self.is_rss = True
|
||||
elif has_html_patterns or http_content_header == 'text/html':
|
||||
self.is_html = True
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
|
||||
# Only mark as generic XML if not already detected as RSS
|
||||
if not self.is_rss:
|
||||
|
||||
@@ -6,34 +6,35 @@ from wtforms import (
|
||||
from wtforms.fields.choices import RadioField
|
||||
from wtforms.fields.form import FormField
|
||||
from wtforms.form import Form
|
||||
from flask_babel import lazy_gettext as _l
|
||||
|
||||
from changedetectionio.forms import processor_text_json_diff_form
|
||||
|
||||
|
||||
class RestockSettingsForm(Form):
|
||||
in_stock_processing = RadioField(label='Re-stock detection', choices=[
|
||||
('in_stock_only', "In Stock only (Out Of Stock -> In Stock only)"),
|
||||
('all_changes', "Any availability changes"),
|
||||
('off', "Off, don't follow availability/restock"),
|
||||
in_stock_processing = RadioField(label=_l('Re-stock detection'), choices=[
|
||||
('in_stock_only', _l("In Stock only (Out Of Stock -> In Stock only)")),
|
||||
('all_changes', _l("Any availability changes")),
|
||||
('off', _l("Off, don't follow availability/restock")),
|
||||
], default="in_stock_only")
|
||||
|
||||
price_change_min = FloatField('Below price to trigger notification', [validators.Optional()],
|
||||
render_kw={"placeholder": "No limit", "size": "10"})
|
||||
price_change_max = FloatField('Above price to trigger notification', [validators.Optional()],
|
||||
render_kw={"placeholder": "No limit", "size": "10"})
|
||||
price_change_threshold_percent = FloatField('Threshold in % for price changes since the original price', validators=[
|
||||
price_change_min = FloatField(_l('Below price to trigger notification'), [validators.Optional()],
|
||||
render_kw={"placeholder": _l("No limit"), "size": "10"})
|
||||
price_change_max = FloatField(_l('Above price to trigger notification'), [validators.Optional()],
|
||||
render_kw={"placeholder": _l("No limit"), "size": "10"})
|
||||
price_change_threshold_percent = FloatField(_l('Threshold in %% for price changes since the original price'), validators=[
|
||||
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=0, max=100, message="Should be between 0 and 100"),
|
||||
validators.NumberRange(min=0, max=100, message=_l("Should be between 0 and 100")),
|
||||
], render_kw={"placeholder": "0%", "size": "5"})
|
||||
|
||||
follow_price_changes = BooleanField('Follow price changes', default=True)
|
||||
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
restock_settings = FormField(RestockSettingsForm)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return 'Restock & Price Detection'
|
||||
return _l('Restock & Price Detection')
|
||||
|
||||
def extra_form_content(self):
|
||||
output = ""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .. import difference_detection_processor
|
||||
from ..base import difference_detection_processor
|
||||
from ..exceptions import ProcessorException
|
||||
from . import Restock
|
||||
from loguru import logger
|
||||
@@ -7,8 +7,11 @@ import urllib3
|
||||
import time
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
name = 'Re-stock & Price detection for pages with a SINGLE product'
|
||||
description = 'Detects if the product goes back to in-stock'
|
||||
# Translatable strings - extracted by pybabel, translated at runtime in __init__.py
|
||||
name = 'Re-stock & Price detection for pages with a SINGLE product' # _()
|
||||
description = 'Detects if the product goes back to in-stock' # _()
|
||||
processor_weight = 1
|
||||
list_badge_text = "Restock" # _()
|
||||
|
||||
class UnableToExtractRestockData(Exception):
|
||||
def __init__(self, status_code):
|
||||
@@ -187,6 +190,8 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
|
||||
itemprop_availability = {}
|
||||
|
||||
# Try built-in extraction first, this will scan metadata in the HTML
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
except MoreThanOnePriceFound as e:
|
||||
@@ -198,6 +203,33 @@ class perform_site_check(difference_detection_processor):
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# If built-in extraction didn't get both price AND availability, try plugin override
|
||||
# Only check plugin if this watch is using a fetcher that might provide better data
|
||||
has_price = itemprop_availability.get('price') is not None
|
||||
has_availability = itemprop_availability.get('availability') is not None
|
||||
|
||||
# @TODO !!! some setting like "Use as fallback" or "always use", "t
|
||||
if not (has_price and has_availability) or True:
|
||||
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
|
||||
fetcher_name = watch.get('fetch_backend', 'html_requests')
|
||||
|
||||
# Only try plugin override if not using system default (which might be anything)
|
||||
if fetcher_name and fetcher_name != 'system':
|
||||
logger.debug("Calling extra plugins for getting item price/availability")
|
||||
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
|
||||
|
||||
if plugin_availability:
|
||||
# Plugin provided better data, use it
|
||||
plugin_has_price = plugin_availability.get('price') is not None
|
||||
plugin_has_availability = plugin_availability.get('availability') is not None
|
||||
|
||||
# Only use plugin data if it's actually better than what we have
|
||||
if plugin_has_price or plugin_has_availability:
|
||||
itemprop_availability = plugin_availability
|
||||
logger.info(f"Using plugin-provided availability data for fetcher '{fetcher_name}' (built-in had price={has_price}, availability={has_availability}; plugin has price={plugin_has_price}, availability={plugin_has_availability})")
|
||||
if not plugin_availability:
|
||||
logger.debug("No item price/availability from plugins")
|
||||
|
||||
# Something valid in get_itemprop_availability() by scraping metadata ?
|
||||
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
|
||||
# Store for other usage
|
||||
|
||||
47
changedetectionio/processors/templates/extract.html
Normal file
47
changedetectionio/processors/templates/extract.html
Normal file
@@ -0,0 +1,47 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
{% block content %}
|
||||
<div class="tabs">
|
||||
<ul>
|
||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">Error Text</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">Error Screenshot</a></li> {% endif %}
|
||||
<li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">Text</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">Screenshot</a></li>
|
||||
<li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">Extract Data</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div id="diff-ui">
|
||||
|
||||
<div class="xxxxtab-pane-inner" id="extract">
|
||||
<form id="extract-data-form" class="pure-form pure-form-stacked edit-form" action="{{ url_for('ui.ui_diff.diff_history_page_extract_POST', uuid=uuid) }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<p>This tool will extract text data from all of the watch history.</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(extract_form.extract_regex) }}
|
||||
<span class="pure-form-message-inline">
|
||||
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
|
||||
|
||||
<p>
|
||||
For example, to extract only the numbers from text ‐<br>
|
||||
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
|
||||
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
|
||||
</p>
|
||||
<p>
|
||||
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
|
||||
</p>
|
||||
<p>
|
||||
Each RegEx group bracket <code>()</code> will be in its own column, the first column value is always the date.
|
||||
</p>
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(extract_form.extract_submit_button) }}
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
@@ -32,7 +32,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
from changedetectionio.model.Watch import model as watch_model
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from flask import request
|
||||
import brotli
|
||||
@@ -45,6 +45,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
text_before_filter = ''
|
||||
trigger_line_numbers = []
|
||||
ignore_line_numbers = []
|
||||
blocked_line_numbers = []
|
||||
|
||||
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
@@ -76,13 +77,16 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
|
||||
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
|
||||
# Do this as a parallel process because it could take some time
|
||||
with ProcessPoolExecutor(max_workers=2) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
# Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
|
||||
try:
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
except Exception as e:
|
||||
x=1
|
||||
|
||||
try:
|
||||
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
@@ -101,14 +105,23 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
try:
|
||||
blocked_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
wordlist=tmp_watch.get('text_should_not_be_present', []) + datastore.data['settings']['application'].get('text_should_not_be_present', []),
|
||||
mode='line numbers'
|
||||
)
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
logger.trace(f"Parsed in {time.time() - now:.3f}s")
|
||||
|
||||
return ({
|
||||
'after_filter': text_after_filter,
|
||||
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
||||
'duration': time.time() - now,
|
||||
'trigger_line_numbers': trigger_line_numbers,
|
||||
'ignore_line_numbers': ignore_line_numbers,
|
||||
'after_filter': text_after_filter,
|
||||
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
||||
'blocked_line_numbers': blocked_line_numbers,
|
||||
'duration': time.time() - now,
|
||||
'ignore_line_numbers': ignore_line_numbers,
|
||||
'trigger_line_numbers': trigger_line_numbers,
|
||||
})
|
||||
|
||||
|
||||
|
||||
232
changedetectionio/processors/text_json_diff/difference.py
Normal file
232
changedetectionio/processors/text_json_diff/difference.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
History/diff rendering for text_json_diff processor.
|
||||
|
||||
This module handles the visualization of text/HTML/JSON changes by rendering
|
||||
a side-by-side or unified diff view with syntax highlighting and change markers.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio import diff, strtobool
|
||||
from changedetectionio.diff import (
|
||||
REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
|
||||
REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
|
||||
ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
|
||||
CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
)
|
||||
from changedetectionio.notification.handler import apply_html_color_to_body
|
||||
|
||||
|
||||
def build_diff_cell_visualizer(content, resolution=100):
|
||||
"""
|
||||
Build a visual cell grid for the diff visualizer.
|
||||
|
||||
Analyzes the content for placemarkers indicating changes and creates a
|
||||
grid of cells representing the document, with each cell marked as:
|
||||
- 'deletion' for removed content
|
||||
- 'insertion' for added content
|
||||
- 'mixed' for cells containing both deletions and insertions
|
||||
- empty string for cells with no changes
|
||||
|
||||
Args:
|
||||
content: The diff content with placemarkers
|
||||
resolution: Number of cells to create (default 100)
|
||||
|
||||
Returns:
|
||||
List of dicts with 'class' key for each cell's CSS class
|
||||
"""
|
||||
if not content:
|
||||
return [{'class': ''} for _ in range(resolution)]
|
||||
now = time.time()
|
||||
# Work with character positions for better accuracy
|
||||
content_length = len(content)
|
||||
|
||||
if content_length == 0:
|
||||
return [{'class': ''} for _ in range(resolution)]
|
||||
|
||||
chars_per_cell = max(1, content_length / resolution)
|
||||
|
||||
# Track change type for each cell
|
||||
cell_data = {}
|
||||
|
||||
# Placemarkers to detect
|
||||
change_markers = {
|
||||
REMOVED_PLACEMARKER_OPEN: 'deletion',
|
||||
ADDED_PLACEMARKER_OPEN: 'insertion',
|
||||
CHANGED_PLACEMARKER_OPEN: 'deletion',
|
||||
CHANGED_INTO_PLACEMARKER_OPEN: 'insertion',
|
||||
}
|
||||
|
||||
# Find all occurrences of each marker
|
||||
for marker, change_type in change_markers.items():
|
||||
pos = 0
|
||||
while True:
|
||||
pos = content.find(marker, pos)
|
||||
if pos == -1:
|
||||
break
|
||||
|
||||
# Calculate which cell this marker falls into
|
||||
cell_index = min(int(pos / chars_per_cell), resolution - 1)
|
||||
|
||||
if cell_index not in cell_data:
|
||||
cell_data[cell_index] = change_type
|
||||
elif cell_data[cell_index] != change_type:
|
||||
# Mixed changes in this cell
|
||||
cell_data[cell_index] = 'mixed'
|
||||
|
||||
pos += len(marker)
|
||||
|
||||
# Build the cell list
|
||||
cells = []
|
||||
for i in range(resolution):
|
||||
change_type = cell_data.get(i, '')
|
||||
cells.append({'class': change_type})
|
||||
|
||||
logger.debug(f"Built diff cell visualizer: {len([c for c in cells if c['class']])} cells with changes out of {resolution} in {time.time() - now:.2f}s")
|
||||
|
||||
return cells
|
||||
|
||||
# Diff display preferences configuration - single source of truth
|
||||
DIFF_PREFERENCES_CONFIG = {
|
||||
'changesOnly': {'default': True, 'type': 'bool'},
|
||||
'ignoreWhitespace': {'default': False, 'type': 'bool'},
|
||||
'removed': {'default': True, 'type': 'bool'},
|
||||
'added': {'default': True, 'type': 'bool'},
|
||||
'replaced': {'default': True, 'type': 'bool'},
|
||||
'type': {'default': 'diffLines', 'type': 'value'},
|
||||
}
|
||||
|
||||
def render(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
|
||||
"""
|
||||
Render the history/diff view for text/JSON/HTML changes.
|
||||
|
||||
Args:
|
||||
watch: The watch object
|
||||
datastore: The ChangeDetectionStore instance
|
||||
request: Flask request object
|
||||
url_for: Flask url_for function
|
||||
render_template: Flask render_template function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
extract_form: Optional pre-built extract form (for error cases)
|
||||
|
||||
Returns:
|
||||
Rendered HTML response
|
||||
"""
|
||||
from changedetectionio import forms
|
||||
|
||||
uuid = watch.get('uuid')
|
||||
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
|
||||
# If a "from_version" was requested, then find it (or the closest one)
|
||||
# Also set "from version" to be the closest version to the one that was last viewed.
|
||||
|
||||
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
|
||||
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
|
||||
from_version = request.args.get('from_version', from_version_timestamp )
|
||||
|
||||
# Use the current one if nothing was specified
|
||||
to_version = request.args.get('to_version', str(dates[-1]))
|
||||
|
||||
try:
|
||||
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
|
||||
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
|
||||
|
||||
try:
|
||||
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
|
||||
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
# Parse diff preferences from request using config as single source of truth
|
||||
# Check if this is a user submission (any diff pref param exists in query string)
|
||||
user_submitted = any(key in request.args for key in DIFF_PREFERENCES_CONFIG.keys())
|
||||
|
||||
diff_prefs = {}
|
||||
for key, config in DIFF_PREFERENCES_CONFIG.items():
|
||||
if user_submitted:
|
||||
# User submitted form - missing checkboxes are explicitly OFF
|
||||
if config['type'] == 'bool':
|
||||
diff_prefs[key] = strtobool(request.args.get(key, 'off'))
|
||||
else:
|
||||
diff_prefs[key] = request.args.get(key, config['default'])
|
||||
else:
|
||||
# Initial load - use defaults from config
|
||||
diff_prefs[key] = config['default']
|
||||
|
||||
content = diff.render_diff(previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
include_replaced=diff_prefs['replaced'],
|
||||
include_added=diff_prefs['added'],
|
||||
include_removed=diff_prefs['removed'],
|
||||
include_equal=diff_prefs['changesOnly'],
|
||||
ignore_junk=diff_prefs['ignoreWhitespace'],
|
||||
word_diff=diff_prefs['type'] == 'diffWords',
|
||||
)
|
||||
|
||||
# Build cell grid visualizer before applying HTML color (so we can detect placemarkers)
|
||||
diff_cell_grid = build_diff_cell_visualizer(content)
|
||||
|
||||
content = apply_html_color_to_body(n_body=content)
|
||||
offscreen_content = render_template("diff-offscreen-options.html")
|
||||
|
||||
note = ''
|
||||
if str(from_version) != str(dates[-2]) or str(to_version) != str(dates[-1]):
|
||||
note = 'Note: You are not viewing the latest changes.'
|
||||
|
||||
output = render_template("diff.html",
|
||||
#initial_scroll_line_number=100,
|
||||
bottom_horizontal_offscreen_contents=offscreen_content,
|
||||
content=content,
|
||||
current_diff_url=watch['url'],
|
||||
diff_cell_grid=diff_cell_grid,
|
||||
diff_prefs=diff_prefs,
|
||||
extra_classes='difference-page',
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - {watch.label} - History",
|
||||
extract_form=extract_form,
|
||||
from_version=str(from_version),
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
newest=to_version_file_contents,
|
||||
newest_version_timestamp=dates[-1],
|
||||
note=note,
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
pure_menu_fixed=False,
|
||||
screenshot=screenshot_url,
|
||||
to_version=str(to_version),
|
||||
uuid=uuid,
|
||||
versions=dates, # All except current/last
|
||||
watch_a=watch,
|
||||
)
|
||||
return output
|
||||
@@ -7,11 +7,10 @@ import re
|
||||
import urllib3
|
||||
|
||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||
from changedetectionio.diff import ADDED_PLACEMARKER_OPEN
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
from ..base import difference_detection_processor
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.processors.magic import guess_stream_type
|
||||
@@ -20,6 +19,8 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
description = 'Detects all text changes where possible'
|
||||
processor_weight = -100
|
||||
list_badge_text = "Text"
|
||||
|
||||
JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
@@ -280,7 +281,7 @@ class ContentProcessor:
|
||||
|
||||
# Sort JSON to avoid false alerts from reordering
|
||||
try:
|
||||
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
|
||||
content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False)
|
||||
except Exception:
|
||||
# Might be malformed JSON, continue anyway
|
||||
pass
|
||||
@@ -298,7 +299,7 @@ class ContentProcessor:
|
||||
xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
||||
is_rss=stream_content_type.is_rss
|
||||
is_xml=stream_content_type.is_rss or stream_content_type.is_xml
|
||||
)
|
||||
|
||||
# XPath1 filters (first match only)
|
||||
@@ -307,7 +308,7 @@ class ContentProcessor:
|
||||
xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
||||
is_rss=stream_content_type.is_rss
|
||||
is_xml=stream_content_type.is_rss or stream_content_type.is_xml
|
||||
)
|
||||
|
||||
# JSON filters
|
||||
@@ -468,6 +469,7 @@ class perform_site_check(difference_detection_processor):
|
||||
c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
|
||||
return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')
|
||||
|
||||
|
||||
# === EMPTY PAGE CHECK ===
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
|
||||
@@ -584,7 +586,6 @@ class perform_site_check(difference_detection_processor):
|
||||
include_added=watch.get('filter_text_added', True),
|
||||
include_removed=watch.get('filter_text_removed', True),
|
||||
include_replaced=watch.get('filter_text_replaced', True),
|
||||
line_feed_sep="\n",
|
||||
include_change_type_prefix=False
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[pytest]
|
||||
addopts = --no-start-live-server --live-server-port=5005
|
||||
addopts = --no-start-live-server --live-server-port=0
|
||||
#testpaths = tests pytest_invenio
|
||||
#live_server_scope = function
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user